File Coverage

Bio/DB/GFF/Adaptor/memory.pm
Criterion Covered Total %
statement 304 384 79.1
branch 104 170 61.1
condition 56 121 46.2
subroutine 32 36 88.8
pod 13 17 76.4
total 509 728 69.9


line stmt bran cond sub pod time code
1             package Bio::DB::GFF::Adaptor::memory;
2              
3             =head1 NAME
4              
5             Bio::DB::GFF::Adaptor::memory -- Bio::DB::GFF database adaptor for in-memory databases
6              
7             =head1 SYNOPSIS
8              
9             use Bio::DB::GFF;
10             my $db = Bio::DB::GFF->new(-adaptor=> 'memory',
11             -gff => 'my_features.gff',
12             -fasta => 'my_dna.fa'
13             );
14              
15             or
16              
17             my $db = Bio::DB::GFF->new(-adaptor=>'memory');
18             $db->load_gff_file('my_features.gff');
19             $db->load_fasta_file('my_dna.fa');
20              
21             See L for other methods.
22              
23             =head1 DESCRIPTION
24              
25             This adaptor implements an in-memory version of Bio::DB::GFF. It can be used to
26             store and retrieve SHORT GFF files. It inherits from Bio::DB::GFF.
27              
28             =head1 CONSTRUCTOR
29              
30             Use Bio::DB::GFF-Enew() to construct new instances of this class.
31             Three named arguments are recommended:
32              
33             Argument Description
34              
35             -adaptor Set to "memory" to create an instance of this class.
36             -gff Read the indicated file or directory of .gff file.
37             -fasta Read the indicated file or directory of fasta files.
38             -dir Indicates a directory containing .gff and .fa files
39              
40             If you use the -dir option and the indicated directory is writable by
41             the current process, then this library will create a FASTA file index
42             that greatly diminishes the memory usage of this module.
43              
44             Alternatively you may create an empty in-memory object using just the
45             -adaptor=E'memory' argument and then call the load_gff_file() and
46             load_fasta_file() methods to load GFF and/or sequence
47             information. This is recommended in CGI/mod_perl/fastCGI environments
48             because these methods do not modify STDIN, unlike the constructor.
49              
50             =head1 METHODS
51              
52             See L for inherited methods.
53              
54             =head1 BUGS
55              
56             none ;-)
57              
58             =head1 SEE ALSO
59              
60             L, L
61              
62             =head1 AUTHOR
63              
64             Shuly Avraham Eavraham@cshl.orgE.
65              
66             Copyright (c) 2002 Cold Spring Harbor Laboratory.
67              
68             This library is free software; you can redistribute it and/or modify
69             it under the same terms as Perl itself.
70              
71             =cut
72              
73 3     3   15 use strict;
  3         3  
  3         96  
74             # AUTHOR: Shulamit Avraham
75             # This module needs to be cleaned up and documented
76              
77             # Bio::DB::GFF::Adaptor::memory -- in-memory db adaptor
78             # implements the low level handling of data which stored in memory.
79             # This adaptor implements a specific in memory schema that is compatible with Bio::DB::GFF.
80             # Inherits from Bio::DB::GFF.
81              
82              
83 3     3   9 use Bio::DB::GFF::Util::Rearrange; # for rearrange()
  3         3  
  3         144  
84 3     3   1575 use Bio::DB::GFF::Adaptor::memory::iterator;
  3         9  
  3         87  
85 3     3   15 use File::Basename 'dirname';
  3         3  
  3         201  
86 3     3   1554 use Bio::DB::GFF::Adaptor::memory::feature_serializer qw(@hash2array_map);
  3         6  
  3         390  
87              
88              
89 3     3   12 use constant MAX_SEGMENT => 1_000_000_000; # the largest a segment can get
  3         3  
  3         135  
90              
91 3     3   18 use base qw(Bio::DB::GFF);
  3         3  
  3         8685  
92              
93             sub new {
94 5     5 1 10 my $class = shift ;
95 5         35 my ($file,$fasta,$dbdir,$preferred_groups) = rearrange([
96             [qw(GFF FILE)],
97             'FASTA',
98             [qw(DSN DB DIR DIRECTORY)],
99             'PREFERRED_GROUPS',
100             ],@_);
101              
102             # fill in object
103 5         23 my $self = bless{ data => [] },$class;
104 5 50       14 $self->preferred_groups($preferred_groups) if defined $preferred_groups;
105 5   33     27 $file ||= $dbdir;
106 5   33     21 $fasta ||= $dbdir;
107 5 50       13 $self->load_gff($file) if $file;
108 5 50       10 $self->load_or_store_fasta($fasta) if $fasta;
109 5         10 return $self;
110             }
111              
112             sub load_or_store_fasta {
113 0     0 0 0 my $self = shift;
114 0         0 my $fasta = shift;
115 0 0 0     0 if ((-f $fasta && -w dirname($fasta))
      0        
      0        
116             or
117             (-d $fasta && -w $fasta)) {
118 0         0 require Bio::DB::Fasta;
119 0 0       0 my $dna_db = eval {Bio::DB::Fasta->new($fasta);}
  0         0  
120             or warn "$@\nCan't open sequence file(s). Use -gff instead of -dir if you wish to load features without sequence.\n";
121 0 0       0 $dna_db && $self->dna_db($dna_db);
122             } else {
123 0         0 $self->load_fasta($fasta);
124             }
125             }
126              
127             sub dna_db {
128 120     120 0 99 my $self = shift;
129 120         115 my $d = $self->{dna_db};
130 120 50       184 $self->{dna_db} = shift if @_;
131 120         238 $d;
132             }
133              
134             sub insert_sequence {
135 2620     2620 0 1917 my $self = shift;
136 2620         2408 my($id,$offset,$seq) = @_;
137 2620         6711 $self->{dna}{$id} .= $seq;
138             }
139              
140             # low-level fetch of a DNA substring given its
141             # name, class and the desired range.
142             sub get_dna {
143 120     120 1 96 my $self = shift;
144 120         132 my ($id,$start,$stop,$class) = @_;
145 120 50       168 if (my $dna_db = $self->dna_db) {
146 0         0 return $dna_db->seq($id,$start=>$stop);
147             }
148 120 50       193 return '' unless $self->{dna};
149              
150 120 100 66     246 return $self->{dna}{$id} unless defined $start || defined $stop;
151 115 50       147 $start = 1 if !defined $start;
152              
153 115         83 my $reversed = 0;
154 115 100       188 if ($start > $stop) {
155 55         44 $reversed++;
156 55         68 ($start,$stop) = ($stop,$start);
157             }
158 115         611 my $dna = substr($self->{dna}{$id},$start-1,$stop-$start+1);
159 115 100       162 if ($reversed) {
160 55         109 $dna =~ tr/gatcGATC/ctagCTAG/;
161 55         92 $dna = reverse $dna;
162             }
163              
164 115         396 $dna;
165             }
166              
167             sub setup_load {
168 5     5 1 5 my $self = shift;
169 5         9 $self->{tmp} = {};
170 5         10 $self->{data} = [];
171 5         7 1;
172             }
173              
174             sub finish_load {
175 5     5 1 7 my $self = shift;
176 5         11 my $idx = 0;
177 5         5 foreach my $arrayref (values %{$self->{tmp}}) {
  5         28  
178 92         80 foreach (@$arrayref) {$_->{feature_id} = $idx++; }
  175         135  
179 92         49 push @{$self->{data}},@$arrayref;
  92         105  
180             }
181 5         9 1;
182             }
183              
184             # this method loads the feature as a hash into memory -
185             # keeps an array of features-hashes as an in-memory db
186             sub load_gff_line {
187 175     175 1 133 my $self = shift;
188 175         114 my $feature_hash = shift;
189 175 50 66     512 $feature_hash->{strand} = '' if $feature_hash->{strand} && $feature_hash->{strand} eq '.';
190 175 50 66     294 $feature_hash->{phase} = '' if $feature_hash->{phase} && $feature_hash->{phase} eq '.';
191 175 50       277 $feature_hash->{gclass} = 'Sequence' unless length $feature_hash->{gclass} > 0;
192             # sort by group please
193 175         111 push @{$self->{tmp}{$feature_hash->{gclass},$feature_hash->{gname}}},$feature_hash;
  175         521  
194             }
195              
196             # given sequence name, return (reference,start,stop,strand)
197             sub get_abscoords {
198 193     193 1 172 my $self = shift;
199 193         199 my ($name,$class,$refseq) = @_;
200 193         156 my %refs;
201             my $regexp;
202            
203 193 50       442 if ($name =~ /[*?]/) { # uh oh regexp time
204 0         0 $name = quotemeta($name);
205 0         0 $name =~ s/\\\*/.*/g;
206 0         0 $name =~ s/\\\?/.?/g;
207 0         0 $regexp++;
208             }
209              
210             # Find all features that have the requested name and class.
211             # Sort them by reference point.
212 193         151 for my $feature (@{$self->{data}}) {
  193         402  
213              
214 6560         3627 my $no_match_class_name;
215             my $empty_class_name;
216             my $class_matches = !defined($feature->{gclass}) ||
217             length($feature->{gclass}) == 0 ||
218 6560   66     23402 $feature->{gclass} eq $class;
219              
220 6560 50       6645 if (defined $feature->{gname}) {
221             my $matches = $class_matches
222 6560   66     8452 && ($regexp ? $feature->{gname} =~ /$name/i : lc($feature->{gname}) eq lc($name));
223 6560         4423 $no_match_class_name = !$matches; # to accomodate Shuly's interesting logic
224             }
225              
226             else{
227 0         0 $empty_class_name = 1;
228             }
229              
230 6560 100       6867 if ($no_match_class_name){
231 6170         4461 my $feature_attributes = $feature->{attributes};
232 6170         7256 my $attributes = {Alias => $name};
233 6170 100       6380 if (!$self->_matching_attributes($feature_attributes,$attributes)){
234 6165         7514 next;
235             }
236             }
237              
238 395         316 push @{$refs{$feature->{ref}}},$feature;
  395         694  
239             }
240              
241             # find out how many reference points we recovered
242 193 100       636 if (! %refs) {
243 8         55 $self->error("$name not found in database");
244 8         45 return;
245             }
246              
247             # compute min and max
248 185         298 my ($ref) = keys %refs;
249 185         151 my @found = @{$refs{$ref}};
  185         326  
250 185         140 my ($strand,$start,$stop);
251              
252 0         0 my @found_segments;
253 185         252 foreach my $ref (keys %refs) {
254 185 50 33     285 next if defined($refseq) and lc($ref) ne lc($refseq);
255 185         128 my @found = @{$refs{$ref}};
  185         245  
256 185         112 my ($strand,$start,$stop,$name);
257 185         198 foreach (@found) {
258 395   100     732 $strand ||= $_->{strand};
259 395 50 66     894 $strand = '+' if $strand && $strand eq '.';
260 395 100 66     941 $start = $_->{start} if !defined($start) || $start > $_->{start};
261 395 100 66     852 $stop = $_->{stop} if !defined($stop) || $stop < $_->{stop};
262 395   66     757 $name ||= $_->{gname};
263             }
264 185         530 push @found_segments,[$ref,$class,$start,$stop,$strand,$name];
265              
266             }
267              
268 185         799 return \@found_segments;
269             }
270              
271             sub search_notes {
272 0     0 1 0 my $self = shift;
273 0         0 my ($search_string,$limit) = @_;
274              
275 0         0 $search_string =~ tr/*?//d;
276              
277 0         0 my @results;
278 0         0 my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
  0         0  
279 0         0 my $search = join '|',@words;
280              
281 0         0 for my $feature (@{$self->{data}}) {
  0         0  
282 0 0 0     0 next unless defined $feature->{gclass} && defined $feature->{gname}; # ignore NULL objects
283 0 0       0 next unless $feature->{attributes};
284 0         0 my @attributes = @{$feature->{attributes}};
  0         0  
285 0         0 my @values = map {$_->[1]} @attributes;
  0         0  
286 0         0 my $value = "@values";
287 0         0 my $matches = 0;
288 0         0 for my $w (@words) {
289 0         0 my @hits = $value =~ /($w)/ig;
290 0         0 $matches += @hits;
291             }
292 0 0       0 next unless $matches;
293              
294 0         0 my $relevance = 10 * $matches;
295 0         0 my $featname = Bio::DB::GFF::Featname->new($feature->{gclass}=>$feature->{gname});
296 0         0 my $note;
297 0         0 $note = join ' ',map {$_->[1]} grep {$_->[0] eq 'Note'} @{$feature->{attributes}};
  0         0  
  0         0  
  0         0  
298 0         0 $note .= join ' ',grep /$search/,map {$_->[1]} grep {$_->[0] ne 'Note'} @{$feature->{attributes}};
  0         0  
  0         0  
  0         0  
299 0         0 my $type = Bio::DB::GFF::Typename->new($feature->{method},$feature->{source});
300 0         0 push @results,[$featname,$note,$relevance,$type];
301 0 0 0     0 last if defined $limit && @results >= $limit;
302             }
303              
304             #added result filtering so that this method returns the expected results
305             #this section of code used to be in GBrowse's do_keyword_search method
306              
307 0         0 my $match_sub = 'sub {';
308 0         0 foreach (split /\s+/,$search_string) {
309 0         0 $match_sub .= "return unless \$_[0] =~ /\Q$_\E/i; ";
310             }
311 0         0 $match_sub .= "};";
312 0         0 my $match = eval $match_sub;
313              
314 0         0 my @matches = grep { $match->($_->[1]) } @results;
  0         0  
315              
316 0         0 return @matches;
317             }
318              
319             sub _delete_features {
320 20     20   18 my $self = shift;
321 20         57 my @feature_ids = sort {$b<=>$a} @_;
  78         93  
322 20         18 my $removed = 0;
323 20         27 foreach (@feature_ids) {
324 75 50 33     117 next unless $_ >= 0 && $_ < @{$self->{data}};
  75         178  
325 75         53 $removed += defined splice(@{$self->{data}},$_,1);
  75         130  
326             }
327 20         69 $removed;
328             }
329              
330             sub _delete {
331 21     21   23 my $self = shift;
332 21         26 my $delete_spec = shift;
333 21   50     49 my $ranges = $delete_spec->{segments} || [];
334 21   50     40 my $types = $delete_spec->{types} || [];
335 21         25 my $force = $delete_spec->{force};
336 21         19 my $range_type = $delete_spec->{range_type};
337              
338 21         22 my $deleted = 0;
339 21 100       51 if (@$ranges) {
    100          
340 10 100       23 my @args = @$types ? (-type=>$types) : ();
341 10         17 push @args,(-range_type => $range_type);
342 10         18 my %ids_to_remove = map {$_->id => 1} map {$_->features(@args)} @$ranges;
  40         60  
  10         18  
343 10         28 $deleted = $self->delete_features(keys %ids_to_remove);
344             } elsif (@$types) {
345 5         18 my %ids_to_remove = map {$_->id => 1} $self->features(-type=>$types);
  20         28  
346 5         13 $deleted = $self->delete_features(keys %ids_to_remove);
347             } else {
348 6 100       55 $self->throw("This operation would delete all feature data and -force not specified")
349             unless $force;
350 3         6 $deleted = @{$self->{data}};
  3         15  
351 3         8 @{$self->{data}} = ();
  3         45  
352             }
353 18         102 $deleted;
354             }
355              
356             # attributes -
357              
358             # Some GFF version 2 files use the groups column to store a series of
359             # attribute/value pairs. In this interpretation of GFF, the first such
360             # pair is treated as the primary group for the feature; subsequent pairs
361             # are treated as attributes. Two attributes have special meaning:
362             # "Note" is for backward compatibility and is used for unstructured text
363             # remarks. "Alias" is considered as a synonym for the feature name.
364             # If no name is provided, then attributes() returns a flattened hash, of
365             # attribute=>value pairs.
366              
367             sub do_attributes{
368 20     20 1 27 my $self = shift;
369 20         24 my ($feature_id,$tag) = @_;
370 20         61 my $attr ;
371              
372             #my $feature = ${$self->{data}}[$feature_id];
373 20         46 my $feature = $self->_basic_features_by_id($feature_id);
374              
375 20         21 my @result;
376 20         24 for my $attr (@{$feature->{attributes}}) {
  20         35  
377 50         66 my ($attr_name,$attr_value) = @$attr ;
378 50 100 100     168 if (defined($tag) && lc($attr_name) eq lc($tag)){push @result,$attr_value;}
  20 100       33  
379 15         21 elsif (!defined($tag)) {push @result,($attr_name,$attr_value);}
380             }
381 20         83 return @result;
382             }
383              
384              
385             #sub get_feature_by_attribute{
386             sub _feature_by_attribute{
387 5     5   7 my $self = shift;
388 5         10 my ($attributes,$callback) = @_;
389 5 50       13 $callback || $self->throw('must provide a callback argument');
390 5         5 my $count = 0;
391 5         10 my $feature_id = -1;
392 5         7 my $feature_group_id = undef;
393              
394 5         5 for my $feature (@{$self->{data}}) {
  5         14  
395              
396 175         82 $feature_id++;
397 175         107 for my $attr (@{$feature->{attributes}}) {
  175         216  
398 65         85 my ($attr_name,$attr_value) = @$attr ;
399             #there could be more than one set of attributes......
400 65         74 foreach (keys %$attributes) {
401 65 100 100     206 if (lc($_) eq lc($attr_name) && lc($attributes->{$_}) eq lc($attr_value)) {
402 10         15 $callback->($self->_hash_to_array($feature));
403 10         25 $count++;
404             }
405             }
406             }
407             }
408              
409             }
410              
411              
412             # This is the low-level method that is called to retrieve GFF lines from
413             # the database. It is responsible for retrieving features that satisfy
414             # range and feature type criteria, and passing the GFF fields to a
415             # callback subroutine.
416              
417             sub get_features{
418 85     85 1 92 my $self = shift;
419 85         82 my $count = 0;
420 85         100 my ($search,$options,$callback) = @_;
421              
422 85         57 my $found_features;
423              
424 85         191 $found_features = $self->_get_features_by_search_options($search,$options);
425              
426             # only true if the sort by group option was specified
427 0         0 @{$found_features} = sort {lc("$a->{gclass}:$a->{gname}") cmp lc("$b->{gclass}:$b->{gname}")}
  0         0  
428 85 50       176 @{$found_features} if $options->{sort_by_group} ;
  0         0  
429              
430 85         97 for my $feature (@{$found_features}) { # only true if the sort by group option was specified
  85         120  
431 470         344 $count++;
432 470         696 $callback->(
433             $self->_hash_to_array($feature)
434             );
435             }
436              
437 85         161 return $count;
438             }
439              
440              
441             # Low level implementation of fetching a named feature.
442             # GFF annotations are named using the group class and name fields.
443             # May return zero, one, or several Bio::DB::GFF::Feature objects.
444              
445             =head2 _feature_by_name
446              
447             Title : _feature_by_name
448             Usage : $db->get_features_by_name($name,$class,$callback)
449             Function: get a list of features by name and class
450             Returns : count of number of features retrieved
451             Args : name of feature, class of feature, and a callback
452             Status : protected
453              
454             This method is used internally. The callback arguments are those used
455             by make_feature().
456              
457             =cut
458              
459             sub _feature_by_name {
460 16     16   18 my $self = shift;
461 16         21 my ($class,$name,$location,$callback) = @_;
462 16 50       26 $callback || $self->throw('must provide a callback argument');
463 16         18 my $count = 0;
464 16         24 my $regexp;
465              
466 16 50       47 if ($name =~ /[*?]/) { # uh oh regexp time
467 0         0 $name = quotemeta($name);
468 0         0 $name =~ s/\\\*/.*/g;
469 0         0 $name =~ s/\\\?/.?/g;
470 0         0 $regexp++;
471             }
472              
473 16         18 for my $feature (@{$self->{data}}) {
  16         41  
474 560 100 33     1788 next unless ($regexp && $feature->{gname} =~ /$name/i) || lc($feature->{gname}) eq lc($name);
      66        
475 74 50 33     354 next if defined($feature->{gclass}) && length($feature->{gclass}) > 0 && $feature->{gclass} ne $class;
      33        
476              
477 74 50       92 if ($location) {
478 0 0       0 next if $location->[0] ne $feature->{ref};
479 0 0 0     0 next if $location->[1] && $location->[1] > $feature->{stop};
480 0 0 0     0 next if $location->[2] && $location->[2] < $feature->{start};
481             }
482 74         60 $count++;
483 74         111 $callback->($self->_hash_to_array($feature),0);
484             }
485 16         29 return $count;
486             }
487              
488             # Low level implementation of fetching a feature by it's id.
489             # The id of the feature as implemented in the in-memory db, is the location of the
490             # feature in the features hash array.
491             sub _feature_by_id{
492 5     5   5 my $self = shift;
493 5         10 my ($ids,$type,$callback) = @_;
494 5 50       14 $callback || $self->throw('must provide a callback argument');
495              
496 5         8 my $feature_group_id = undef;
497              
498 5         7 my $count = 0;
499 5 50       15 if ($type eq 'feature'){
500 5         14 for my $feature_id (@$ids){
501 5         15 my $feature = $self->_basic_features_by_id($feature_id);
502 5 50       25 $callback->($self->_hash_to_array($feature)) if $callback;
503 5         15 $count++;
504             }
505             }
506             }
507              
508             sub _basic_features_by_id{
509 25     25   26 my $self = shift;
510 25         27 my ($ids) = @_;
511            
512 25 50       90 $ids = [$ids] unless ref $ids =~ /ARRAY/;
513              
514 25         30 my @result;
515 25         37 for my $feature_id (@$ids){
516 25         25 push @result, ${$self->{data}}[$feature_id];
  25         63  
517             }
518 25 50       70 return wantarray() ? @result : $result[0];
519             }
520              
521             # This method is similar to get_features(), except that it returns an
522             # iterator across the query.
523             # See Bio::DB::GFF::Adaptor::memory::iterator.
524              
525             sub get_features_iterator {
526 15     15 1 21 my $self = shift;
527 15         22 my ($search,$options,$callback) = @_;
528 15 50       24 $callback || $self->throw('must provide a callback argument');
529              
530 15         29 my $results = $self->_get_features_by_search_options($search,$options);
531 15         30 my $results_array = $self->_convert_feature_hash_to_array($results);
532              
533 15         96 return Bio::DB::GFF::Adaptor::memory::iterator->new($results_array,$callback);
534             }
535              
536              
537             # This method is responsible for fetching the list of feature type names.
538             # The query may be limited to a particular range, in
539             # which case the range is indicated by a landmark sequence name and
540             # class and its subrange, if any. These arguments may be undef if it is
541             # desired to retrieve all feature types.
542              
543             # If the count flag is false, the method returns a simple list of
544             # Bio::DB::GFF::Typename objects. If $count is true, the method returns
545             # a list of $name=>$count pairs, where $count indicates the number of
546             # times this feature occurs in the range.
547              
548             sub get_types {
549 25     25 1 29 my $self = shift;
550 25         35 my ($srcseq,$class,$start,$stop,$want_count,$typelist) = @_;
551              
552 25         25 my(%result,%obj);
553              
554 25         31 for my $feature (@{$self->{data}}) {
  25         72  
555 875         830 my $feature_start = $feature->{start};
556 875         598 my $feature_stop = $feature->{stop};
557 875         675 my $feature_ref = $feature->{ref};
558 875         623 my $feature_class = $feature->{class};
559 875         594 my $feature_method = $feature->{method};
560 875         630 my $feature_source = $feature->{source};
561              
562 875 100       1065 if (defined $srcseq){
563 525 100       772 next unless lc($feature_ref) eq lc($srcseq);
564             }
565              
566 605 50       675 if (defined $class){
567 0 0 0     0 next unless defined $feature_class && $feature_class eq $class ;
568             }
569              
570             # the requested range should OVERLAP the retrieved features
571 605 100 66     1282 if (defined $start or defined $stop) {
572 255 50       288 $start = 1 unless defined $start;
573 255 50       294 $stop = MAX_SEGMENT unless defined $stop;
574 255 50 33     659 next unless $feature_stop >= $start && $feature_start <= $stop;
575             }
576              
577 605 50 33     755 if (defined $typelist && @$typelist){
578 0 0       0 next unless $self->_matching_typelist($feature_method,$feature_source,$typelist);
579             }
580              
581 605         809 my $type = Bio::DB::GFF::Typename->new($feature_method,$feature_source);
582 605         737 $result{$type}++;
583 605         633 $obj{$type} = $type;
584              
585             } #end features loop
586              
587 25 100       228 return $want_count ? %result : values %obj;
588             }
589              
590             sub classes {
591 0     0 1 0 my $self = shift;
592 0         0 my %classes;
593 0         0 for my $feature (@{$self->{data}}) {
  0         0  
594 0         0 $classes{$feature->{gclass}}++;
595             }
596 0         0 my @classes = sort keys %classes;
597 0         0 return @classes;
598             }
599              
600             # Internal method that performs a search on the features array,
601             # sequentialy retrieves the features, and performs a check on each feature
602             # according to the search options.
603             sub _get_features_by_search_options{
604 100     100   94 my $count = 0;
605 100         94 my ($self, $search,$options) = @_;
606             my ($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes) =
607 100         169 (@{$search}{qw(rangetype refseq refclass start stop types)},
608 100         121 @{$options}{qw(sparse sort_by_group ATTRIBUTES)}) ;
  100         166  
609              
610 100         77 my @found_features;
611 100         120 my $data = $self->{data};
612              
613 100         77 my $feature_id = -1 ;
614 100         77 my $feature_group_id = undef;
615              
616 100         80 for my $feature (@{$data}) {
  100         152  
617              
618 3270         1772 $feature_id++;
619              
620 3270         2568 my $feature_start = $feature->{start};
621 3270         2164 my $feature_stop = $feature->{stop};
622 3270         2300 my $feature_ref = $feature->{ref};
623              
624 3270 100       3555 if (defined $refseq){
625 2905 100       3836 next unless lc($feature_ref) eq lc($refseq);
626             }
627              
628 1330 100 66     2649 if (defined $start or defined $stop) {
629 455 50       493 $start = 0 unless defined($start);
630 455 50       481 $stop = MAX_SEGMENT unless defined($stop);
631              
632 455 100       501 if ($rangetype eq 'overlaps') {
    50          
    0          
633 385 100 100     1047 next unless $feature_stop >= $start && $feature_start <= $stop;
634             } elsif ($rangetype eq 'contains') {
635 70 100 66     188 next unless $feature_start >= $start && $feature_stop <= $stop;
636             } elsif ($rangetype eq 'contained_in') {
637 0 0 0     0 next unless $feature_start <= $start && $feature_stop >= $stop;
638             } else {
639 0 0 0     0 next unless $feature_start == $start && $feature_stop == $stop;
640             }
641              
642             }
643              
644 1210         928 my $feature_source = $feature->{source};
645 1210         893 my $feature_method = $feature->{method};
646              
647 1210 100 66     2845 if (defined $types && @$types){
648 375 100       427 next unless $self->_matching_typelist($feature_method,$feature_source,$types);
649             }
650              
651 970         713 my $feature_attributes = $feature->{attributes};
652 970 100       1044 if (defined $attributes){
653 255 100       257 next unless $self->_matching_attributes($feature_attributes,$attributes);
654             }
655              
656             # if we get here, then we have a feature that meets the criteria.
657             # Then we just push onto an array
658             # of found features and continue.
659              
660 740         456 my $found_feature = $feature ;
661 740         567 $found_feature->{feature_id} = $feature_id;
662 740         659 $found_feature->{group_id} = $feature_group_id;
663 740         816 push @found_features,$found_feature;
664             }
665              
666 100         201 return \@found_features;
667             }
668              
669              
670             sub _hash_to_array {
671 829     829   666 my ($self,$feature_hash) = @_;
672 829         652 my @array = @{$feature_hash}{@hash2array_map};
  829         3588  
673 829 100       2668 return wantarray ? @array : \@array;
674             }
675              
676             # this subroutine is needed for convertion of the feature from hash to array in order to
677             # pass it to the callback subroutine
678             sub _convert_feature_hash_to_array{
679 15     15   18 my ($self, $feature_hash_array) = @_;
680 15         25 my @features_array_array = map {scalar $self->_hash_to_array($_)} @$feature_hash_array;
  270         271  
681 15         26 return \@features_array_array;
682             }
683              
684             sub _matching_typelist{
685 375     375   327 my ($self, $feature_method,$feature_source,$typelist) = @_;
686 375         377 foreach (@$typelist) {
687 1090         1001 my ($search_method,$search_source) = @$_;
688 1090 100       1537 next if lc($search_method) ne lc($feature_method);
689 135 50 66     239 next if defined($search_source) && lc($search_source) ne lc($feature_source);
690 135         247 return 1;
691             }
692 240         420 return 0;
693             }
694              
695             sub _matching_attributes {
696 6425     6425   4638 my ($self, $feature_attributes,$attributes) = @_ ;
697 6425         8326 foreach (keys %$attributes) {
698 6443 100       6211 return 0 if !_match_all_attr_in_feature($_,$attributes->{$_},$feature_attributes)
699             }
700 30         52 return 1;
701             }
702              
703             sub _match_all_attr_in_feature{
704 6443     6443   4735 my ($attr_name,$attr_value,$feature_attributes) = @_;
705 6443         5968 for my $attr (@$feature_attributes) {
706 2411         2657 my ($feature_attr_name,$feature_attr_value) = @$attr ;
707 2411 100 100     4746 next if ($attr_name ne $feature_attr_name || $attr_value ne $feature_attr_value);
708 48         97 return 1;
709             }
710 6395         14008 return 0;
711             }
712              
713              
714 5     5 1 15 sub do_initialize { 1; }
715 0     0 0   sub get_feature_by_group_id{ 1; }
716              
717             1;
718