File Coverage

Bio/DB/GFF/Adaptor/memory.pm
Criterion Covered Total %
statement 305 384 79.4
branch 104 170 61.1
condition 57 121 47.1
subroutine 32 36 88.8
pod 13 17 76.4
total 511 728 70.1


line stmt bran cond sub pod time code
1             package Bio::DB::GFF::Adaptor::memory;
2              
3             =head1 NAME
4              
5             Bio::DB::GFF::Adaptor::memory -- Bio::DB::GFF database adaptor for in-memory databases
6              
7             =head1 SYNOPSIS
8              
9             use Bio::DB::GFF;
10             my $db = Bio::DB::GFF->new(-adaptor=> 'memory',
11             -gff => 'my_features.gff',
12             -fasta => 'my_dna.fa'
13             );
14              
15             or
16              
17             my $db = Bio::DB::GFF->new(-adaptor=>'memory');
18             $db->load_gff_file('my_features.gff');
19             $db->load_fasta_file('my_dna.fa');
20              
21             See L for other methods.
22              
23             =head1 DESCRIPTION
24              
25             This adaptor implements an in-memory version of Bio::DB::GFF. It can be used to
26             store and retrieve SHORT GFF files. It inherits from Bio::DB::GFF.
27              
28             =head1 CONSTRUCTOR
29              
30             Use Bio::DB::GFF-Enew() to construct new instances of this class.
31             Three named arguments are recommended:
32              
33             Argument Description
34              
35             -adaptor Set to "memory" to create an instance of this class.
36             -gff Read the indicated file or directory of .gff file.
37             -fasta Read the indicated file or directory of fasta files.
38             -dir Indicates a directory containing .gff and .fa files
39              
40             If you use the -dir option and the indicated directory is writable by
41             the current process, then this library will create a FASTA file index
42             that greatly diminishes the memory usage of this module.
43              
44             Alternatively you may create an empty in-memory object using just the
45             -adaptor=E'memory' argument and then call the load_gff_file() and
46             load_fasta_file() methods to load GFF and/or sequence
47             information. This is recommended in CGI/mod_perl/fastCGI environments
48             because these methods do not modify STDIN, unlike the constructor.
49              
50             =head1 METHODS
51              
52             See L for inherited methods.
53              
54             =head1 BUGS
55              
56             none ;-)
57              
58             =head1 SEE ALSO
59              
60             L, L
61              
62             =head1 AUTHOR
63              
64             Shuly Avraham Eavraham@cshl.orgE.
65              
66             Copyright (c) 2002 Cold Spring Harbor Laboratory.
67              
68             This library is free software; you can redistribute it and/or modify
69             it under the same terms as Perl itself.
70              
71             =cut
72              
73 3     3   21 use strict;
  3         9  
  3         99  
74             # AUTHOR: Shulamit Avraham
75             # This module needs to be cleaned up and documented
76              
77             # Bio::DB::GFF::Adaptor::memory -- in-memory db adaptor
78             # implements the low level handling of data which stored in memory.
79             # This adaptor implements a specific in memory schema that is compatible with Bio::DB::GFF.
80             # Inherits from Bio::DB::GFF.
81              
82              
83 3     3   15 use Bio::DB::GFF::Util::Rearrange; # for rearrange()
  3         6  
  3         165  
84 3     3   1452 use Bio::DB::GFF::Adaptor::memory::iterator;
  3         6  
  3         93  
85 3     3   15 use File::Basename 'dirname';
  3         6  
  3         213  
86 3     3   1461 use Bio::DB::GFF::Adaptor::memory::feature_serializer qw(@hash2array_map);
  3         9  
  3         369  
87              
88              
89 3     3   15 use constant MAX_SEGMENT => 1_000_000_000; # the largest a segment can get
  3         3  
  3         132  
90              
91 3     3   15 use base qw(Bio::DB::GFF);
  3         3  
  3         9474  
92              
93             sub new {
94 5     5 1 15 my $class = shift ;
95 5         43 my ($file,$fasta,$dbdir,$preferred_groups) = rearrange([
96             [qw(GFF FILE)],
97             'FASTA',
98             [qw(DSN DB DIR DIRECTORY)],
99             'PREFERRED_GROUPS',
100             ],@_);
101              
102             # fill in object
103 5         27 my $self = bless{ data => [] },$class;
104 5 50       18 $self->preferred_groups($preferred_groups) if defined $preferred_groups;
105 5   33     29 $file ||= $dbdir;
106 5   33     28 $fasta ||= $dbdir;
107 5 50       12 $self->load_gff($file) if $file;
108 5 50       27 $self->load_or_store_fasta($fasta) if $fasta;
109 5         17 return $self;
110             }
111              
112             sub load_or_store_fasta {
113 0     0 0 0 my $self = shift;
114 0         0 my $fasta = shift;
115 0 0 0     0 if ((-f $fasta && -w dirname($fasta))
      0        
      0        
116             or
117             (-d $fasta && -w $fasta)) {
118 0         0 require Bio::DB::Fasta;
119 0 0       0 my $dna_db = eval {Bio::DB::Fasta->new($fasta);}
  0         0  
120             or warn "$@\nCan't open sequence file(s). Use -gff instead of -dir if you wish to load features without sequence.\n";
121 0 0       0 $dna_db && $self->dna_db($dna_db);
122             } else {
123 0         0 $self->load_fasta($fasta);
124             }
125             }
126              
127             sub dna_db {
128 120     120 0 152 my $self = shift;
129 120         176 my $d = $self->{dna_db};
130 120 50       275 $self->{dna_db} = shift if @_;
131 120         236 $d;
132             }
133              
134             sub insert_sequence {
135 2620     2620 0 3320 my $self = shift;
136 2620         4117 my($id,$offset,$seq) = @_;
137 2620         7867 $self->{dna}{$id} .= $seq;
138             }
139              
140             # low-level fetch of a DNA substring given its
141             # name, class and the desired range.
142             sub get_dna {
143 120     120 1 199 my $self = shift;
144 120         204 my ($id,$start,$stop,$class) = @_;
145 120 50       229 if (my $dna_db = $self->dna_db) {
146 0         0 return $dna_db->seq($id,$start=>$stop);
147             }
148 120 50       265 return '' unless $self->{dna};
149              
150 120 100 66     291 return $self->{dna}{$id} unless defined $start || defined $stop;
151 115 50       217 $start = 1 if !defined $start;
152              
153 115         143 my $reversed = 0;
154 115 100       198 if ($start > $stop) {
155 55         70 $reversed++;
156 55         115 ($start,$stop) = ($stop,$start);
157             }
158 115         713 my $dna = substr($self->{dna}{$id},$start-1,$stop-$start+1);
159 115 100       218 if ($reversed) {
160 55         147 $dna =~ tr/gatcGATC/ctagCTAG/;
161 55         104 $dna = reverse $dna;
162             }
163              
164 115         462 $dna;
165             }
166              
167             sub setup_load {
168 5     5 1 13 my $self = shift;
169 5         15 $self->{tmp} = {};
170 5         16 $self->{data} = [];
171 5         16 1;
172             }
173              
174             sub finish_load {
175 5     5 1 10 my $self = shift;
176 5         10 my $idx = 0;
177 5         5 foreach my $arrayref (values %{$self->{tmp}}) {
  5         28  
178 92         115 foreach (@$arrayref) {$_->{feature_id} = $idx++; }
  175         218  
179 92         94 push @{$self->{data}},@$arrayref;
  92         147  
180             }
181 5         10 1;
182             }
183              
184             # this method loads the feature as a hash into memory -
185             # keeps an array of features-hashes as an in-memory db
186             sub load_gff_line {
187 175     175 1 261 my $self = shift;
188 175         209 my $feature_hash = shift;
189 175 50 66     530 $feature_hash->{strand} = '' if $feature_hash->{strand} && $feature_hash->{strand} eq '.';
190 175 50 66     349 $feature_hash->{phase} = '' if $feature_hash->{phase} && $feature_hash->{phase} eq '.';
191 175 50       337 $feature_hash->{gclass} = 'Sequence' unless length $feature_hash->{gclass} > 0;
192             # sort by group please
193 175         204 push @{$self->{tmp}{$feature_hash->{gclass},$feature_hash->{gname}}},$feature_hash;
  175         675  
194             }
195              
196             # given sequence name, return (reference,start,stop,strand)
197             sub get_abscoords {
198 193     193 1 250 my $self = shift;
199 193         330 my ($name,$class,$refseq) = @_;
200 193         250 my %refs;
201             my $regexp;
202            
203 193 50       530 if ($name =~ /[*?]/) { # uh oh regexp time
204 0         0 $name = quotemeta($name);
205 0         0 $name =~ s/\\\*/.*/g;
206 0         0 $name =~ s/\\\?/.?/g;
207 0         0 $regexp++;
208             }
209              
210             # Find all features that have the requested name and class.
211             # Sort them by reference point.
212 193         257 for my $feature (@{$self->{data}}) {
  193         410  
213              
214 6560         7467 my $no_match_class_name;
215             my $empty_class_name;
216             my $class_matches = !defined($feature->{gclass}) ||
217             length($feature->{gclass}) == 0 ||
218 6560   66     20674 $feature->{gclass} eq $class;
219              
220 6560 50       9613 if (defined $feature->{gname}) {
221             my $matches = $class_matches
222 6560   100     10967 && ($regexp ? $feature->{gname} =~ /$name/i : lc($feature->{gname}) eq lc($name));
223 6560         7710 $no_match_class_name = !$matches; # to accomodate Shuly's interesting logic
224             }
225              
226             else{
227 0         0 $empty_class_name = 1;
228             }
229              
230 6560 100       8771 if ($no_match_class_name){
231 6170         6974 my $feature_attributes = $feature->{attributes};
232 6170         9501 my $attributes = {Alias => $name};
233 6170 100       8782 if (!$self->_matching_attributes($feature_attributes,$attributes)){
234 6165         11113 next;
235             }
236             }
237              
238 395         414 push @{$refs{$feature->{ref}}},$feature;
  395         908  
239             }
240              
241             # find out how many reference points we recovered
242 193 100       333 if (! %refs) {
243 8         54 $self->error("$name not found in database");
244 8         43 return;
245             }
246              
247             # compute min and max
248 185         322 my ($ref) = keys %refs;
249 185         216 my @found = @{$refs{$ref}};
  185         344  
250 185         378 my ($strand,$start,$stop);
251              
252 185         0 my @found_segments;
253 185         260 foreach my $ref (keys %refs) {
254 185 50 33     356 next if defined($refseq) and lc($ref) ne lc($refseq);
255 185         252 my @found = @{$refs{$ref}};
  185         308  
256 185         249 my ($strand,$start,$stop,$name);
257 185         260 foreach (@found) {
258 395   100     929 $strand ||= $_->{strand};
259 395 50 66     820 $strand = '+' if $strand && $strand eq '.';
260 395 100 66     879 $start = $_->{start} if !defined($start) || $start > $_->{start};
261 395 100 66     789 $stop = $_->{stop} if !defined($stop) || $stop < $_->{stop};
262 395   66     752 $name ||= $_->{gname};
263             }
264 185         666 push @found_segments,[$ref,$class,$start,$stop,$strand,$name];
265              
266             }
267              
268 185         831 return \@found_segments;
269             }
270              
271             sub search_notes {
272 0     0 1 0 my $self = shift;
273 0         0 my ($search_string,$limit) = @_;
274              
275 0         0 $search_string =~ tr/*?//d;
276              
277 0         0 my @results;
278 0         0 my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
  0         0  
279 0         0 my $search = join '|',@words;
280              
281 0         0 for my $feature (@{$self->{data}}) {
  0         0  
282 0 0 0     0 next unless defined $feature->{gclass} && defined $feature->{gname}; # ignore NULL objects
283 0 0       0 next unless $feature->{attributes};
284 0         0 my @attributes = @{$feature->{attributes}};
  0         0  
285 0         0 my @values = map {$_->[1]} @attributes;
  0         0  
286 0         0 my $value = "@values";
287 0         0 my $matches = 0;
288 0         0 for my $w (@words) {
289 0         0 my @hits = $value =~ /($w)/ig;
290 0         0 $matches += @hits;
291             }
292 0 0       0 next unless $matches;
293              
294 0         0 my $relevance = 10 * $matches;
295 0         0 my $featname = Bio::DB::GFF::Featname->new($feature->{gclass}=>$feature->{gname});
296 0         0 my $note;
297 0         0 $note = join ' ',map {$_->[1]} grep {$_->[0] eq 'Note'} @{$feature->{attributes}};
  0         0  
  0         0  
  0         0  
298 0         0 $note .= join ' ',grep /$search/,map {$_->[1]} grep {$_->[0] ne 'Note'} @{$feature->{attributes}};
  0         0  
  0         0  
  0         0  
299 0         0 my $type = Bio::DB::GFF::Typename->new($feature->{method},$feature->{source});
300 0         0 push @results,[$featname,$note,$relevance,$type];
301 0 0 0     0 last if defined $limit && @results >= $limit;
302             }
303              
304             #added result filtering so that this method returns the expected results
305             #this section of code used to be in GBrowse's do_keyword_search method
306              
307 0         0 my $match_sub = 'sub {';
308 0         0 foreach (split /\s+/,$search_string) {
309 0         0 $match_sub .= "return unless \$_[0] =~ /\Q$_\E/i; ";
310             }
311 0         0 $match_sub .= "};";
312 0         0 my $match = eval $match_sub;
313              
314 0         0 my @matches = grep { $match->($_->[1]) } @results;
  0         0  
315              
316 0         0 return @matches;
317             }
318              
319             sub _delete_features {
320 20     20   37 my $self = shift;
321 20         77 my @feature_ids = sort {$b<=>$a} @_;
  85         147  
322 20         31 my $removed = 0;
323 20         37 foreach (@feature_ids) {
324 75 50 33     134 next unless $_ >= 0 && $_ < @{$self->{data}};
  75         174  
325 75         90 $removed += defined splice(@{$self->{data}},$_,1);
  75         138  
326             }
327 20         75 $removed;
328             }
329              
330             sub _delete {
331 21     21   37 my $self = shift;
332 21         28 my $delete_spec = shift;
333 21   50     52 my $ranges = $delete_spec->{segments} || [];
334 21   50     51 my $types = $delete_spec->{types} || [];
335 21         31 my $force = $delete_spec->{force};
336 21         30 my $range_type = $delete_spec->{range_type};
337              
338 21         31 my $deleted = 0;
339 21 100       63 if (@$ranges) {
    100          
340 10 100       23 my @args = @$types ? (-type=>$types) : ();
341 10         23 push @args,(-range_type => $range_type);
342 10         23 my %ids_to_remove = map {$_->id => 1} map {$_->features(@args)} @$ranges;
  40         85  
  10         23  
343 10         38 $deleted = $self->delete_features(keys %ids_to_remove);
344             } elsif (@$types) {
345 5         18 my %ids_to_remove = map {$_->id => 1} $self->features(-type=>$types);
  20         36  
346 5         18 $deleted = $self->delete_features(keys %ids_to_remove);
347             } else {
348 6 100       84 $self->throw("This operation would delete all feature data and -force not specified")
349             unless $force;
350 3         7 $deleted = @{$self->{data}};
  3         16  
351 3         9 @{$self->{data}} = ();
  3         43  
352             }
353 18         118 $deleted;
354             }
355              
356             # attributes -
357              
358             # Some GFF version 2 files use the groups column to store a series of
359             # attribute/value pairs. In this interpretation of GFF, the first such
360             # pair is treated as the primary group for the feature; subsequent pairs
361             # are treated as attributes. Two attributes have special meaning:
362             # "Note" is for backward compatibility and is used for unstructured text
363             # remarks. "Alias" is considered as a synonym for the feature name.
364             # If no name is provided, then attributes() returns a flattened hash, of
365             # attribute=>value pairs.
366              
367             sub do_attributes{
368 20     20 1 28 my $self = shift;
369 20         46 my ($feature_id,$tag) = @_;
370 20         23 my $attr ;
371              
372             #my $feature = ${$self->{data}}[$feature_id];
373 20         43 my $feature = $self->_basic_features_by_id($feature_id);
374              
375 20         29 my @result;
376 20         27 for my $attr (@{$feature->{attributes}}) {
  20         41  
377 50         88 my ($attr_name,$attr_value) = @$attr ;
378 50 100 100     163 if (defined($tag) && lc($attr_name) eq lc($tag)){push @result,$attr_value;}
  20 100       34  
379 15         28 elsif (!defined($tag)) {push @result,($attr_name,$attr_value);}
380             }
381 20         72 return @result;
382             }
383              
384              
385             #sub get_feature_by_attribute{
386             sub _feature_by_attribute{
387 5     5   10 my $self = shift;
388 5         10 my ($attributes,$callback) = @_;
389 5 50       13 $callback || $self->throw('must provide a callback argument');
390 5         8 my $count = 0;
391 5         10 my $feature_id = -1;
392 5         5 my $feature_group_id = undef;
393              
394 5         8 for my $feature (@{$self->{data}}) {
  5         16  
395              
396 175         176 $feature_id++;
397 175         177 for my $attr (@{$feature->{attributes}}) {
  175         263  
398 65         90 my ($attr_name,$attr_value) = @$attr ;
399             #there could be more than one set of attributes......
400 65         105 foreach (keys %$attributes) {
401 65 100 100     201 if (lc($_) eq lc($attr_name) && lc($attributes->{$_}) eq lc($attr_value)) {
402 10         25 $callback->($self->_hash_to_array($feature));
403 10         30 $count++;
404             }
405             }
406             }
407             }
408              
409             }
410              
411              
412             # This is the low-level method that is called to retrieve GFF lines from
413             # the database. It is responsible for retrieving features that satisfy
414             # range and feature type criteria, and passing the GFF fields to a
415             # callback subroutine.
416              
417             sub get_features{
418 85     85 1 160 my $self = shift;
419 85         118 my $count = 0;
420 85         176 my ($search,$options,$callback) = @_;
421              
422 85         102 my $found_features;
423              
424 85         201 $found_features = $self->_get_features_by_search_options($search,$options);
425              
426             # only true if the sort by group option was specified
427 0         0 @{$found_features} = sort {lc("$a->{gclass}:$a->{gname}") cmp lc("$b->{gclass}:$b->{gname}")}
  0         0  
428 85 50       168 @{$found_features} if $options->{sort_by_group} ;
  0         0  
429              
430 85         94 for my $feature (@{$found_features}) { # only true if the sort by group option was specified
  85         151  
431 470         564 $count++;
432 470         795 $callback->(
433             $self->_hash_to_array($feature)
434             );
435             }
436              
437 85         209 return $count;
438             }
439              
440              
441             # Low level implementation of fetching a named feature.
442             # GFF annotations are named using the group class and name fields.
443             # May return zero, one, or several Bio::DB::GFF::Feature objects.
444              
445             =head2 _feature_by_name
446              
447             Title : _feature_by_name
448             Usage : $db->get_features_by_name($name,$class,$callback)
449             Function: get a list of features by name and class
450             Returns : count of number of features retrieved
451             Args : name of feature, class of feature, and a callback
452             Status : protected
453              
454             This method is used internally. The callback arguments are those used
455             by make_feature().
456              
457             =cut
458              
459             sub _feature_by_name {
460 16     16   24 my $self = shift;
461 16         34 my ($class,$name,$location,$callback) = @_;
462 16 50       32 $callback || $self->throw('must provide a callback argument');
463 16         24 my $count = 0;
464 16         29 my $regexp;
465              
466 16 50       57 if ($name =~ /[*?]/) { # uh oh regexp time
467 0         0 $name = quotemeta($name);
468 0         0 $name =~ s/\\\*/.*/g;
469 0         0 $name =~ s/\\\?/.?/g;
470 0         0 $regexp++;
471             }
472              
473 16         22 for my $feature (@{$self->{data}}) {
  16         39  
474 560 100 33     1563 next unless ($regexp && $feature->{gname} =~ /$name/i) || lc($feature->{gname}) eq lc($name);
      66        
475 74 50 33     348 next if defined($feature->{gclass}) && length($feature->{gclass}) > 0 && $feature->{gclass} ne $class;
      33        
476              
477 74 50       119 if ($location) {
478 0 0       0 next if $location->[0] ne $feature->{ref};
479 0 0 0     0 next if $location->[1] && $location->[1] > $feature->{stop};
480 0 0 0     0 next if $location->[2] && $location->[2] < $feature->{start};
481             }
482 74         79 $count++;
483 74         130 $callback->($self->_hash_to_array($feature),0);
484             }
485 16         32 return $count;
486             }
487              
488             # Low level implementation of fetching a feature by it's id.
489             # The id of the feature as implemented in the in-memory db, is the location of the
490             # feature in the features hash array.
491             sub _feature_by_id{
492 5     5   10 my $self = shift;
493 5         15 my ($ids,$type,$callback) = @_;
494 5 50       15 $callback || $self->throw('must provide a callback argument');
495              
496 5         10 my $feature_group_id = undef;
497              
498 5         5 my $count = 0;
499 5 50       15 if ($type eq 'feature'){
500 5         17 for my $feature_id (@$ids){
501 5         18 my $feature = $self->_basic_features_by_id($feature_id);
502 5 50       18 $callback->($self->_hash_to_array($feature)) if $callback;
503 5         20 $count++;
504             }
505             }
506             }
507              
508             sub _basic_features_by_id{
509 25     25   47 my $self = shift;
510 25         40 my ($ids) = @_;
511            
512 25 50       86 $ids = [$ids] unless ref $ids =~ /ARRAY/;
513              
514 25         39 my @result;
515 25         48 for my $feature_id (@$ids){
516 25         32 push @result, ${$self->{data}}[$feature_id];
  25         68  
517             }
518 25 50       65 return wantarray() ? @result : $result[0];
519             }
520              
521             # This method is similar to get_features(), except that it returns an
522             # iterator across the query.
523             # See Bio::DB::GFF::Adaptor::memory::iterator.
524              
525             sub get_features_iterator {
526 15     15 1 30 my $self = shift;
527 15         29 my ($search,$options,$callback) = @_;
528 15 50       24 $callback || $self->throw('must provide a callback argument');
529              
530 15         35 my $results = $self->_get_features_by_search_options($search,$options);
531 15         33 my $results_array = $self->_convert_feature_hash_to_array($results);
532              
533 15         94 return Bio::DB::GFF::Adaptor::memory::iterator->new($results_array,$callback);
534             }
535              
536              
537             # This method is responsible for fetching the list of feature type names.
538             # The query may be limited to a particular range, in
539             # which case the range is indicated by a landmark sequence name and
540             # class and its subrange, if any. These arguments may be undef if it is
541             # desired to retrieve all feature types.
542              
543             # If the count flag is false, the method returns a simple list of
544             # Bio::DB::GFF::Typename objects. If $count is true, the method returns
545             # a list of $name=>$count pairs, where $count indicates the number of
546             # times this feature occurs in the range.
547              
548             sub get_types {
549 25     25 1 50 my $self = shift;
550 25         55 my ($srcseq,$class,$start,$stop,$want_count,$typelist) = @_;
551              
552 25         47 my(%result,%obj);
553              
554 25         37 for my $feature (@{$self->{data}}) {
  25         62  
555 875         1281 my $feature_start = $feature->{start};
556 875         1044 my $feature_stop = $feature->{stop};
557 875         1039 my $feature_ref = $feature->{ref};
558 875         1013 my $feature_class = $feature->{class};
559 875         1002 my $feature_method = $feature->{method};
560 875         1036 my $feature_source = $feature->{source};
561              
562 875 100       1190 if (defined $srcseq){
563 525 100       901 next unless lc($feature_ref) eq lc($srcseq);
564             }
565              
566 605 50       807 if (defined $class){
567 0 0 0     0 next unless defined $feature_class && $feature_class eq $class ;
568             }
569              
570             # the requested range should OVERLAP the retrieved features
571 605 100 66     1227 if (defined $start or defined $stop) {
572 255 50       341 $start = 1 unless defined $start;
573 255 50       338 $stop = MAX_SEGMENT unless defined $stop;
574 255 50 33     603 next unless $feature_stop >= $start && $feature_start <= $stop;
575             }
576              
577 605 50 33     955 if (defined $typelist && @$typelist){
578 0 0       0 next unless $self->_matching_typelist($feature_method,$feature_source,$typelist);
579             }
580              
581 605         1034 my $type = Bio::DB::GFF::Typename->new($feature_method,$feature_source);
582 605         960 $result{$type}++;
583 605         940 $obj{$type} = $type;
584              
585             } #end features loop
586              
587 25 100       221 return $want_count ? %result : values %obj;
588             }
589              
590             sub classes {
591 0     0 1 0 my $self = shift;
592 0         0 my %classes;
593 0         0 for my $feature (@{$self->{data}}) {
  0         0  
594 0         0 $classes{$feature->{gclass}}++;
595             }
596 0         0 my @classes = sort keys %classes;
597 0         0 return @classes;
598             }
599              
600             # Internal method that performs a search on the features array,
601             # sequentialy retrieves the features, and performs a check on each feature
602             # according to the search options.
603             sub _get_features_by_search_options{
604 100     100   125 my $count = 0;
605 100         170 my ($self, $search,$options) = @_;
606             my ($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes) =
607 100         177 (@{$search}{qw(rangetype refseq refclass start stop types)},
608 100         145 @{$options}{qw(sparse sort_by_group ATTRIBUTES)}) ;
  100         261  
609              
610 100         127 my @found_features;
611 100         165 my $data = $self->{data};
612              
613 100         121 my $feature_id = -1 ;
614 100         126 my $feature_group_id = undef;
615              
616 100         113 for my $feature (@{$data}) {
  100         158  
617              
618 3270         3344 $feature_id++;
619              
620 3270         4063 my $feature_start = $feature->{start};
621 3270         3622 my $feature_stop = $feature->{stop};
622 3270         3898 my $feature_ref = $feature->{ref};
623              
624 3270 100       4422 if (defined $refseq){
625 2905 100       4671 next unless lc($feature_ref) eq lc($refseq);
626             }
627              
628 1330 100 66     2674 if (defined $start or defined $stop) {
629 455 50       601 $start = 0 unless defined($start);
630 455 50       633 $stop = MAX_SEGMENT unless defined($stop);
631              
632 455 100       624 if ($rangetype eq 'overlaps') {
    50          
    0          
633 385 100 100     987 next unless $feature_stop >= $start && $feature_start <= $stop;
634             } elsif ($rangetype eq 'contains') {
635 70 100 66     176 next unless $feature_start >= $start && $feature_stop <= $stop;
636             } elsif ($rangetype eq 'contained_in') {
637 0 0 0     0 next unless $feature_start <= $start && $feature_stop >= $stop;
638             } else {
639 0 0 0     0 next unless $feature_start == $start && $feature_stop == $stop;
640             }
641              
642             }
643              
644 1210         1521 my $feature_source = $feature->{source};
645 1210         1374 my $feature_method = $feature->{method};
646              
647 1210 100 66     2783 if (defined $types && @$types){
648 375 100       559 next unless $self->_matching_typelist($feature_method,$feature_source,$types);
649             }
650              
651 970         1269 my $feature_attributes = $feature->{attributes};
652 970 100       1358 if (defined $attributes){
653 255 100       362 next unless $self->_matching_attributes($feature_attributes,$attributes);
654             }
655              
656             # if we get here, then we have a feature that meets the criteria.
657             # Then we just push onto an array
658             # of found features and continue.
659              
660 740         840 my $found_feature = $feature ;
661 740         852 $found_feature->{feature_id} = $feature_id;
662 740         999 $found_feature->{group_id} = $feature_group_id;
663 740         1244 push @found_features,$found_feature;
664             }
665              
666 100         252 return \@found_features;
667             }
668              
669              
670             sub _hash_to_array {
671 829     829   1197 my ($self,$feature_hash) = @_;
672 829         1026 my @array = @{$feature_hash}{@hash2array_map};
  829         3963  
673 829 100       3176 return wantarray ? @array : \@array;
674             }
675              
676             # this subroutine is needed for convertion of the feature from hash to array in order to
677             # pass it to the callback subroutine
678             sub _convert_feature_hash_to_array{
679 15     15   33 my ($self, $feature_hash_array) = @_;
680 15         24 my @features_array_array = map {scalar $self->_hash_to_array($_)} @$feature_hash_array;
  270         360  
681 15         29 return \@features_array_array;
682             }
683              
684             sub _matching_typelist{
685 375     375   564 my ($self, $feature_method,$feature_source,$typelist) = @_;
686 375         464 foreach (@$typelist) {
687 1090         1329 my ($search_method,$search_source) = @$_;
688 1090 100       1728 next if lc($search_method) ne lc($feature_method);
689 135 50 66     242 next if defined($search_source) && lc($search_source) ne lc($feature_source);
690 135         264 return 1;
691             }
692 240         473 return 0;
693             }
694              
695             sub _matching_attributes {
696 6425     6425   9072 my ($self, $feature_attributes,$attributes) = @_ ;
697 6425         10640 foreach (keys %$attributes) {
698 6442 100       9730 return 0 if !_match_all_attr_in_feature($_,$attributes->{$_},$feature_attributes)
699             }
700 30         71 return 1;
701             }
702              
703             sub _match_all_attr_in_feature{
704 6442     6442   8282 my ($attr_name,$attr_value,$feature_attributes) = @_;
705 6442         7829 for my $attr (@$feature_attributes) {
706 2409         3423 my ($feature_attr_name,$feature_attr_value) = @$attr ;
707 2409 100 100     4394 next if ($attr_name ne $feature_attr_name || $attr_value ne $feature_attr_value);
708 47         104 return 1;
709             }
710 6395         13846 return 0;
711             }
712              
713              
714 5     5 1 15 sub do_initialize { 1; }
715 0     0 0   sub get_feature_by_group_id{ 1; }
716              
717             1;
718