File Coverage

Bio/SeqFeature/Generic.pm
Criterion Covered Total %
statement 238 278 85.6
branch 119 148 80.4
condition 51 77 66.2
subroutine 39 44 88.6
pod 29 34 85.2
total 476 581 81.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqFeature::Generic
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Ewan Birney
7             #
8             # Copyright Ewan Birney
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqFeature::Generic - Generic SeqFeature
17              
18             =head1 SYNOPSIS
19              
20             $feat = Bio::SeqFeature::Generic->new(
21             -start => 10,
22             -end => 100,
23             -strand => -1,
24             -primary => 'repeat', # -primary_tag is a synonym
25             -source_tag => 'repeatmasker',
26             -display_name => 'alu family',
27             -score => 1000,
28             -tag => { new => 1,
29             author => 'someone',
30             sillytag => 'this is silly!' } );
31              
32             $feat = Bio::SeqFeature::Generic->new( -gff_string => $string );
33             # if you want explicitly GFF1
34             $feat = Bio::SeqFeature::Generic->new( -gff1_string => $string );
35              
36             # add it to an annotated sequence
37              
38             $annseq->add_SeqFeature($feat);
39              
40             =head1 DESCRIPTION
41              
42             Bio::SeqFeature::Generic is a generic implementation for the
43             Bio::SeqFeatureI interface, providing a simple object to provide all
44             the information for a feature on a sequence.
45              
46             For many Features, this is all you will need to use (for example, this
47             is fine for Repeats in DNA sequence or Domains in protein
48             sequence). For other features, which have more structure, this is a
49             good base class to extend using inheritance to have new things: this
50             is what is done in the L,
51             L and L, which provide
52             well coordinated classes to represent genes on DNA sequence (for
53             example, you can get the protein sequence out from a transcript
54             class).
55              
56             For many Features, you want to add some piece of information, for
57             example a common one is that this feature is 'new' whereas other
58             features are 'old'. The tag system, which here is implemented using a
59             hash can be used here. You can use the tag system to extend the
60             L programmatically: that is, you know that you have
61             read in more information into the tag 'mytag' which you can then
62             retrieve. This means you do not need to know how to write inherited
63             Perl to provide more complex information on a feature, and/or, if you
64             do know but you do not want to write a new class every time you need
65             some extra piece of information, you can use the tag system to easily
66             store and then retrieve information.
67              
68             The tag system can be written in/out of GFF format, and also into EMBL
69             format via the L system
70              
71             =head1 Implemented Interfaces
72              
73             This class implements the following interfaces.
74              
75             =over 4
76              
77             =item L
78              
79             Note that this includes implementing Bio::RangeI.
80              
81             =item L
82              
83             =item L
84              
85             Features held by a feature are essentially sub-features.
86              
87             =back
88              
89             =head1 FEEDBACK
90              
91             =head2 Mailing Lists
92              
93             User feedback is an integral part of the evolution of this and other
94             Bioperl modules. Send your comments and suggestions preferably to one
95             of the Bioperl mailing lists. Your participation is much appreciated.
96              
97             bioperl-l@bioperl.org - General discussion
98             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
99              
100             =head2 Support
101              
102             Please direct usage questions or support issues to the mailing list:
103              
104             I
105              
106             rather than to the module maintainer directly. Many experienced and
107             reponsive experts will be able look at the problem and quickly
108             address it. Please include a thorough description of the problem
109             with code and data examples if at all possible.
110              
111             =head2 Reporting Bugs
112              
113             Report bugs to the Bioperl bug tracking system to help us keep track
114             the bugs and their resolution. Bug reports can be submitted via
115             the web:
116              
117             https://github.com/bioperl/bioperl-live/issues
118              
119             =head1 AUTHOR - Ewan Birney
120              
121             Ewan Birney Ebirney@sanger.ac.ukE
122              
123             =head1 DEVELOPERS
124              
125             This class has been written with an eye out for inheritance. The fields
126             the actual object hash are:
127              
128             _gsf_tag_hash = reference to a hash for the tags
129             _gsf_sub_array = reference to an array for subfeatures
130              
131             =head1 APPENDIX
132              
133             The rest of the documentation details each of the object
134             methods. Internal methods are usually preceded with a _
135              
136             =cut
137              
138              
139             # Let the code begin...
140              
141              
142             package Bio::SeqFeature::Generic;
143 147     147   7702 use strict;
  147         240  
  147         3753  
144              
145 147     147   26281 use Bio::Annotation::Collection;
  147         283  
  147         3355  
146 147     147   22821 use Bio::Location::Simple;
  147         310  
  147         3741  
147 147     147   43815 use Bio::Location::Split;
  147         345  
  147         4622  
148 147     147   53718 use Bio::Tools::GFF;
  147         407  
  147         5189  
149             #use Tie::IxHash;
150              
151 147     147   1049 use base qw(Bio::Root::Root Bio::SeqFeatureI Bio::FeatureHolderI Bio::AnnotatableI);
  147         265  
  147         64352  
152              
153             sub new {
154 41022     41022 1 74232 my ( $caller, @args) = @_;
155 41022         73605 my ($self) = $caller->SUPER::new(@args);
156 41022         93002 $self->_register_for_cleanup(\&cleanup_generic);
157 41022         58876 $self->{'_parse_h'} = {};
158 41022         49170 $self->{'_gsf_tag_hash'} = {};
159              
160             # bulk-set attributes
161 41022         72537 $self->set_attributes(@args);
162              
163             # done - we hope
164 41022         79659 return $self;
165             }
166              
167             =head2 set_attributes
168              
169             Title : set_attributes
170             Usage :
171             Function: Sets a whole array of parameters at once.
172             Example :
173             Returns : none
174             Args : Named parameters, in the form as they would otherwise be passed
175             to new(). Currently recognized are:
176              
177             -start start position
178             -end end position
179             -strand strand
180             -phase the phase of the feature (0..2)
181             -primary_tag primary tag
182             -primary (synonym for -primary_tag)
183             -source_tag source tag
184             -source (synonym for -source_tag)
185             -frame frame
186             -score score value
187             -tag a reference to a tag/value hash
188             -gff_string GFF v.2 string to initialize from
189             -gff1_string GFF v.1 string to initialize from
190             -seq_id the display name of the sequence
191             -annotation the AnnotationCollectionI object
192             -location the LocationI object
193              
194             =cut
195              
196             sub set_attributes {
197 45956     45956 1 72676 my ($self,@args) = @_;
198 45956         166827 my ($start, $end, $strand, $primary_tag, $source_tag, $primary,
199             $source, $frame, $score, $tag, $gff_string, $gff1_string,
200             $seqname, $seqid, $annot, $location, $display_name, $pid, $phase) =
201             $self->_rearrange([qw(START
202             END
203             STRAND
204             PRIMARY_TAG
205             SOURCE_TAG
206             PRIMARY
207             SOURCE
208             FRAME
209             SCORE
210             TAG
211             GFF_STRING
212             GFF1_STRING
213             SEQNAME
214             SEQ_ID
215             ANNOTATION
216             LOCATION
217             DISPLAY_NAME
218             PRIMARY_ID
219             PHASE
220             )], @args);
221 45956 100       133741 $location && $self->location($location);
222 45956 50       60996 $gff_string && $self->_from_gff_string($gff_string);
223 45956 50       60627 $gff1_string && do {
224 0         0 $self->gff_format(Bio::Tools::GFF->new('-gff_version' => 1));
225 0         0 $self->_from_gff_stream($gff1_string);
226             };
227            
228 45956 50       58162 $pid && $self->primary_id($pid);
229 45956 100       72979 $primary_tag && $self->primary_tag($primary_tag);
230 45956 100       61526 $source_tag && $self->source_tag($source_tag);
231 45956 100       60499 $primary && $self->primary_tag($primary);
232 45956 100       59049 $source && $self->source_tag($source);
233 45956 50       57849 $annot && $self->annotation($annot);
234 45956 100       68212 defined $start && $self->start($start);
235 45956 100       67551 defined $end && $self->end($end);
236 45956 100       65613 defined $strand && $self->strand($strand);
237 45956 100       57054 defined $frame && $self->frame($frame);
238 45956 100       56103 defined $display_name && $self->display_name($display_name);
239 45956 100       55338 defined $score && $self->score($score);
240 45956 100       58186 defined $phase && $self->phase($phase);
241              
242 45956 50       60242 if($seqname) {
243 0         0 $self->warn("-seqname is deprecated. Please use -seq_id instead.");
244 0 0       0 $seqid = $seqname unless $seqid;
245             }
246 45956 100       60220 $self->seq_id($seqid) if (defined($seqid));
247 45956 100       82433 $tag && do {
248 1980         5913 foreach my $t ( keys %$tag ) {
249 4989 100       20034 $self->add_tag_value($t, UNIVERSAL::isa($tag->{$t}, "ARRAY") ? @{$tag->{$t}} : $tag->{$t});
  198         571  
250             }
251             };
252             }
253              
254              
255             =head2 direct_new
256              
257             Title : direct_new
258             Usage : my $feat = Bio::SeqFeature::Generic->direct_new;
259             Function: create a blessed hash - for performance improvement in
260             object creation
261             Returns : Bio::SeqFeature::Generic object
262             Args : none
263              
264             =cut
265              
266             sub direct_new {
267 9798     9798 1 13643 my ( $class) = @_;
268 9798         13998 my ($self) = {};
269              
270 9798         13656 bless $self,$class;
271              
272 9798         14865 return $self;
273             }
274              
275              
276             =head2 location
277              
278             Title : location
279             Usage : my $location = $feat->location();
280             Function: returns a location object suitable for identifying location
281             of feature on sequence or parent feature
282             Returns : Bio::LocationI object
283             Args : [optional] Bio::LocationI object to set the value to.
284              
285             =cut
286              
287             sub location {
288 611900     611900 1 641158 my($self, $value ) = @_;
289              
290 611900 100       943152 if (defined($value)) {
    100          
291 30244 50 33     102913 unless (ref($value) and $value->isa('Bio::LocationI')) {
292 0         0 $self->throw("object $value pretends to be a location but ".
293             "does not implement Bio::LocationI");
294             }
295 30244         47287 $self->{'_location'} = $value;
296             }
297             elsif (! $self->{'_location'}) {
298             # guarantees a real location object is returned every time
299 18521         38869 $self->{'_location'} = Bio::Location::Simple->new();
300             }
301 611900         946316 return $self->{'_location'};
302             }
303              
304              
305             =head2 start
306              
307             Title : start
308             Usage : my $start = $feat->start;
309             $feat->start(20);
310             Function: Get/set on the start coordinate of the feature
311             Returns : integer
312             Args : none
313              
314             =cut
315              
316             sub start {
317 195881     195881 1 240430 my ($self, $value) = @_;
318             # Return soon if setting value
319 195881 100       254034 if (defined $value) {
320 37178         48807 return $self->location->start($value);
321             }
322              
323 158703 100       256583 return $self->location->start() if not defined $self->{'_gsf_seq'};
324             # Check circular sequences cut by origin
325 66332         61661 my $start;
326 66332 100 100     101514 if ( $self->{'_gsf_seq'}->is_circular
327             and $self->location->isa('Bio::Location::SplitLocationI')
328             ) {
329 8         50 my $primary_seq_length = $self->{'_gsf_seq'}->length;
330 8         17 my @sublocs = $self->location->sub_Location;
331              
332 8         12 my $cut_by_origin = 0;
333 8         14 my ($a_end, $a_strand) = (0, 0);
334 8         13 my ($b_start, $b_strand) = (0, 0);
335 8         21 for (my $i = 1; $i < scalar @sublocs; $i++) {
336 8         23 $a_end = $sublocs[$i-1]->end;
337 8         21 $a_strand = $sublocs[$i-1]->strand;
338 8         17 $b_start = $sublocs[$i]->start;
339 8         31 $b_strand = $sublocs[$i]->strand;
340             # cut by origin condition
341 8 50 33     56 if ( $a_end == $primary_seq_length
      33        
342             and $b_start == 1
343             and $a_strand == $b_strand
344             ) {
345 8         12 $cut_by_origin = 1;
346 8         13 last;
347             }
348             }
349 8 50       24 $start = ($cut_by_origin == 1) ? ($sublocs[0]->start) : ($self->location->start);
350             }
351             else {
352 66324         83549 $start = $self->location->start;
353             }
354 66332         135833 return $start;
355             }
356              
357              
358             =head2 end
359              
360             Title : end
361             Usage : my $end = $feat->end;
362             $feat->end($end);
363             Function: get/set on the end coordinate of the feature
364             Returns : integer
365             Args : none
366              
367             =cut
368              
369             sub end {
370 195372     195372 1 230394 my ($self, $value) = @_;
371             # Return soon if setting value
372 195372 100       252858 if (defined $value) {
373 37174         44921 return $self->location->end($value);
374             }
375              
376 158198 100       243956 return $self->location->end() if not defined $self->{'_gsf_seq'};
377             # Check circular sequences cut by origin
378 66073         58406 my $end;
379 66073 100 100     92756 if ( $self->{'_gsf_seq'}->is_circular
380             and $self->location->isa('Bio::Location::SplitLocationI')
381             ) {
382 8         19 my $primary_seq_length = $self->{'_gsf_seq'}->length;
383 8         14 my @sublocs = $self->location->sub_Location;
384              
385 8         13 my $cut_by_origin = 0;
386 8         11 my ($a_end, $a_strand) = (0, 0);
387 8         15 my ($b_start, $b_strand) = (0, 0);
388 8         23 for (my $i = 1; $i < scalar @sublocs; $i++) {
389 8         23 $a_end = $sublocs[$i-1]->end;
390 8         24 $a_strand = $sublocs[$i-1]->strand;
391 8         19 $b_start = $sublocs[$i]->start;
392 8         18 $b_strand = $sublocs[$i]->strand;
393             # cut by origin condition
394 8 50 33     51 if ( $a_end == $primary_seq_length
      33        
395             and $b_start == 1
396             and $a_strand == $b_strand
397             ) {
398 8         13 $cut_by_origin = 1;
399 8         14 last;
400             }
401             }
402 8 50       23 $end = ($cut_by_origin == 1) ? ($sublocs[-1]->end) : ($self->location->end);
403             }
404             else {
405 66065         80861 $end = $self->location->end;
406             }
407 66073         134991 return $end;
408             }
409              
410              
411             =head2 length
412              
413             Title : length
414             Usage : my $len = $feat->length;
415             Function: Get the feature length computed as:
416             $feat->end - $feat->start + 1
417             Returns : integer
418             Args : none
419              
420             =cut
421              
422             sub length {
423 2679     2679 1 3218 my $self = shift;
424 2679         4435 my $length = $self->end() - $self->start() + 1;
425              
426             # In circular sequences cut by origin $start > $end,
427             # e.g., join(5075..5386,1..51)), $start = 5075, $end = 51,
428             # then adjust using the primary_seq length (5386)
429 2679 100 66     5481 if ($length < 0 and defined $self->{'_gsf_seq'}) {
430 3         10 $length += $self->{'_gsf_seq'}->length;
431             }
432 2679         9156 return $length;
433             }
434              
435              
436             =head2 strand
437              
438             Title : strand
439             Usage : my $strand = $feat->strand();
440             $feat->strand($strand);
441             Function: get/set on strand information, being 1,-1 or 0
442             Returns : -1,1 or 0
443             Args : none
444              
445             =cut
446              
447             sub strand {
448 145871     145871 1 143638 my $self = shift;
449 145871         162410 return $self->location->strand(@_);
450             }
451              
452              
453             =head2 score
454              
455             Title : score
456             Usage : my $score = $feat->score();
457             $feat->score($score);
458             Function: get/set on score information
459             Returns : float
460             Args : none if get, the new value if set
461              
462             =cut
463              
464             sub score {
465 5975     5975 1 6464 my $self = shift;
466              
467 5975 100       9094 if (@_) {
468 4951         5875 my $value = shift;
469              
470 4951 50 100     37688 if ( defined $value && $value && $value !~ /^[A-Za-z]+$/ &&
      66        
      66        
      33        
471             $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ and $value != 0) {
472 0         0 $self->throw(-class=>'Bio::Root::BadParameter',
473             -text=>"'$value' is not a valid score",
474             -value=>$value);
475             }
476 4951 50       9037 if ($self->has_tag('score')) {
477 0         0 $self->warn("Removing score value(s)");
478 0         0 $self->remove_tag('score');
479             }
480 4951         8923 $self->add_tag_value('score',$value);
481             }
482 5975 100       9129 my ($score) = $self->has_tag('score') ? $self->get_tag_values('score') : undef;
483 5975         10996 return $score;
484             }
485              
486              
487             =head2 frame
488              
489             Title : frame
490             Usage : my $frame = $feat->frame();
491             $feat->frame($frame);
492             Function: get/set on frame information
493             Returns : 0,1,2, '.'
494             Args : none if get, the new value if set
495              
496             =cut
497              
498             sub frame {
499 2794     2794 1 3197 my $self = shift;
500              
501 2794 100       4272 if ( @_ ) {
502 1975         2567 my $value = shift;
503 1975 50 33     7239 if ( defined $value &&
504             $value !~ /^[0-2.]$/ ) {
505 0         0 $self->throw("'$value' is not a valid frame");
506             }
507 1975 100 66     5464 if( defined $value && $value eq '.' ) { $value = '.' }
  8         10  
508 1975         6621 return $self->{'_gsf_frame'} = $value;
509             }
510 819         3156 return $self->{'_gsf_frame'};
511             }
512              
513              
514             =head2 primary_tag
515              
516             Title : primary_tag
517             Usage : my $tag = $feat->primary_tag();
518             $feat->primary_tag('exon');
519             Function: get/set on the primary tag for a feature,
520             eg 'exon'
521             Returns : a string
522             Args : none
523              
524             =cut
525              
526             sub primary_tag {
527 133115     133115 1 129887 my $self = shift;
528 133115 100       191894 return $self->{'_primary_tag'} = shift if @_;
529 100452   100     262452 return $self->{'_primary_tag'} || '';
530             }
531              
532              
533             =head2 source_tag
534              
535             Title : source_tag
536             Usage : my $tag = $feat->source_tag();
537             $feat->source_tag('genscan');
538             Function: Returns the source tag for a feature,
539             eg, 'genscan'
540             Returns : a string
541             Args : none
542              
543             =cut
544              
545             sub source_tag {
546 69452     69452 1 68927 my $self = shift;
547 69452 100       117936 return $self->{'_source_tag'} = shift if @_;
548 36718   100     82745 return $self->{'_source_tag'} || '';
549             }
550              
551              
552             =head2 has_tag
553              
554             Title : has_tag
555             Usage : my $value = $feat->has_tag('some_tag');
556             Function: Tests whether a feature containings a tag
557             Returns : TRUE if the SeqFeature has the tag,
558             and FALSE otherwise.
559             Args : The name of a tag
560              
561             =cut
562              
563             sub has_tag {
564 63550     63550 1 74689 my ($self, $tag) = @_;
565 63550         139032 return exists $_[0]->{'_gsf_tag_hash'}->{$tag};
566             }
567              
568              
569             =head2 add_tag_value
570              
571             Title : add_tag_value
572             Usage : $feat->add_tag_value('note',"this is a note");
573             Returns : TRUE on success
574             Args : tag (string) and one or more values (any scalar(s))
575              
576             =cut
577              
578             sub add_tag_value {
579 116066     116066 1 114846 my $self = shift;
580 116066         102968 my $tag = shift;
581 116066   100     314773 $self->{'_gsf_tag_hash'}->{$tag} ||= [];
582 116066         108467 push(@{$self->{'_gsf_tag_hash'}->{$tag}},@_);
  116066         259999  
583             }
584              
585              
586             =head2 get_tag_values
587              
588             Title : get_tag_values
589             Usage : my @values = $feat->get_tag_values('note');
590             Function: Returns a list of all the values stored
591             under a particular tag.
592             Returns : A list of scalars
593             Args : The name of the tag
594              
595             =cut
596              
597             sub get_tag_values {
598 60647     60647 1 98798 my ($self, $tag) = @_;
599              
600 60647 50       82552 if( ! defined $tag ) { return (); }
  0         0  
601 60647 100       80357 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
602 1         12 $self->throw("asking for tag value that does not exist $tag");
603             }
604 60646         51595 return @{$self->{'_gsf_tag_hash'}->{$tag}};
  60646         118581  
605             }
606              
607              
608             =head2 get_all_tags
609              
610             Title : get_all_tags
611             Usage : my @tags = $feat->get_all_tags();
612             Function: Get a list of all the tags in a feature
613             Returns : An array of tag names
614             Args : none
615              
616             # added a sort so that tags will be returned in a predictable order
617             # I still think we should be able to specify a sort function
618             # to the object at some point
619             # -js
620              
621             =cut
622              
623             sub get_all_tags {
624 19046     19046 1 21394 my ($self, @args) = @_;
625 19046         17225 return sort keys %{ $self->{'_gsf_tag_hash'}};
  19046         164874  
626             }
627              
628              
629             =head2 remove_tag
630              
631             Title : remove_tag
632             Usage : $feat->remove_tag('some_tag');
633             Function: removes a tag from this feature
634             Returns : the array of values for this tag before removing it
635             Args : tag (string)
636              
637             =cut
638              
639             sub remove_tag {
640 19     19 1 38 my ($self, $tag) = @_;
641              
642 19 50       59 if ( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
643 0         0 $self->throw("trying to remove a tag that does not exist: $tag");
644             }
645 19         23 my @vals = @{$self->{'_gsf_tag_hash'}->{$tag}};
  19         48  
646 19         45 delete $self->{'_gsf_tag_hash'}->{$tag};
647 19         39 return @vals;
648             }
649              
650              
651             =head2 attach_seq
652              
653             Title : attach_seq
654             Usage : $feat->attach_seq($seq);
655             Function: Attaches a Bio::Seq object to this feature. This
656             Bio::Seq object is for the *entire* sequence: ie
657             from 1 to 10000
658             Example :
659             Returns : TRUE on success
660             Args : a Bio::PrimarySeqI compliant object
661              
662             =cut
663              
664             sub attach_seq {
665 17238     17238 1 17929 my ($self, $seq) = @_;
666              
667 17238 50 33     57771 if ( ! ($seq && ref($seq) && $seq->isa("Bio::PrimarySeqI")) ) {
      33        
668 0         0 $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures but got '".ref($seq)."'");
669             }
670              
671 17238         22763 $self->{'_gsf_seq'} = $seq;
672              
673             # attach to sub features if they want it
674 17238         18860 foreach ( $self->sub_SeqFeature() ) {
675 4675         5570 $_->attach_seq($seq);
676             }
677 17238         19293 return 1;
678             }
679              
680              
681             =head2 seq
682              
683             Title : seq
684             Usage : my $tseq = $feat->seq();
685             Function: returns the truncated sequence (if there) for this
686             Example :
687             Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
688             bounded by start & end, or undef if there is no sequence attached.
689             If the strand is defined and set to -1, the returned sequence is
690             the reverse-complement of the region
691             Args : none
692              
693             =cut
694              
695             sub seq {
696 243     243 1 385 my ($self, $arg) = @_;
697              
698 243 50       459 if ( defined $arg ) {
699 0         0 $self->throw("Calling SeqFeature::Generic->seq with an argument. You probably want attach_seq");
700             }
701              
702 243 100       503 if ( ! exists $self->{'_gsf_seq'} ) {
703 1         3 return;
704             }
705              
706             # assumming our seq object is sensible, it should not have to yank
707             # the entire sequence out here.
708 242         275 my $seq;
709 242         485 my $start = $self->start;
710 242         462 my $end = $self->end;
711             # Check circular sequences cut by origin (e.g. join(2006035..2007700,1..257))
712 242 100 100     512 if ( $self->{'_gsf_seq'}->is_circular
      66        
713             and $self->location->isa('Bio::Location::SplitLocationI')
714             and $start > $end
715             ) {
716 1         4 my $primary_seq_length = $self->{'_gsf_seq'}->length;
717              
718             # Get duplicate object with the first sequence piece using trunc()
719 1         16 $seq = $self->{'_gsf_seq'}->trunc($start, $primary_seq_length);
720              
721             # Get post-origin sequence and build the complete sequence
722 1         4 my $post_origin = $self->{'_gsf_seq'}->subseq(1, $end);
723 1         3 my $complete_seq = $seq->seq() . $post_origin;
724              
725             # Add complete sequence to object
726 1         4 $seq->seq($complete_seq);
727             }
728             else {
729 241         726 $seq = $self->{'_gsf_seq'}->trunc($start, $end);
730             }
731              
732 242 100 100     561 if ( defined $self->strand && $self->strand == -1 ) {
733 143         365 $seq = $seq->revcom;
734             }
735              
736 242         851 return $seq;
737             }
738              
739              
740             =head2 entire_seq
741              
742             Title : entire_seq
743             Usage : my $whole_seq = $feat->entire_seq();
744             Function: gives the entire sequence that this seqfeature is attached to
745             Example :
746             Returns : a Bio::PrimarySeqI compliant object, or undef if there is no
747             sequence attached
748             Args :
749              
750             =cut
751              
752             sub entire_seq {
753 756     756 1 2868 return shift->{'_gsf_seq'};
754             }
755              
756              
757             =head2 seq_id
758              
759             Title : seq_id
760             Usage : $feat->seq_id($newval)
761             Function: There are many cases when you make a feature that you
762             do know the sequence name, but do not know its actual
763             sequence. This is an attribute such that you can store
764             the ID (e.g., display_id) of the sequence.
765              
766             This attribute should *not* be used in GFF dumping, as
767             that should come from the collection in which the seq
768             feature was found.
769             Returns : value of seq_id
770             Args : newvalue (optional)
771              
772             =cut
773              
774             sub seq_id {
775 79337     79337 1 85261 my $obj = shift;
776 79337 100       124037 return $obj->{'_gsf_seq_id'} = shift if @_;
777 42974         80139 return $obj->{'_gsf_seq_id'};
778             }
779              
780              
781             =head2 display_name
782              
783             Title : display_name
784             Usage : my $featname = $feat->display_name;
785             Function: Implements the display_name() method, which is a human-readable
786             name for the feature.
787             Returns : value of display_name (a string)
788             Args : Optionally, on set the new value or undef
789              
790             =cut
791              
792             sub display_name {
793 322     322 1 372 my $self = shift;
794 322 100       723 return $self->{'display_name'} = shift if @_;
795 182   100     567 return $self->{'display_name'} || '';
796             }
797              
798              
799             =head1 Methods for implementing Bio::AnnotatableI
800              
801             =head2 annotation
802              
803             Title : annotation
804             Usage : $feat->annotation($annot_obj);
805             Function: Get/set the annotation collection object for annotating this
806             feature.
807              
808             Example :
809             Returns : A Bio::AnnotationCollectionI object
810             Args : newvalue (optional)
811              
812             =cut
813              
814             sub annotation {
815 2950     2950 1 4144 my ($obj,$value) = @_;
816              
817             # we are smart if someone references the object and there hasn't been
818             # one set yet
819 2950 100 100     8672 if(defined $value || ! defined $obj->{'annotation'} ) {
820 1807 100       6662 $value = Bio::Annotation::Collection->new() unless ( defined $value );
821 1807         2549 $obj->{'annotation'} = $value;
822             }
823 2950         6807 return $obj->{'annotation'};
824             }
825              
826              
827             =head1 Methods to implement Bio::FeatureHolderI
828              
829             This includes methods for retrieving, adding, and removing
830             features. Since this is already a feature, features held by this
831             feature holder are essentially sub-features.
832              
833             =head2 get_SeqFeatures
834              
835             Title : get_SeqFeatures
836             Usage : my @feats = $feat->get_SeqFeatures();
837             Function: Returns an array of sub Sequence Features
838             Returns : An array
839             Args : none
840              
841             =cut
842              
843             sub get_SeqFeatures {
844 30455 100   30455 1 24874 return @{ shift->{'_gsf_sub_array'} || []};
  30455         76740  
845             }
846              
847              
848             =head2 add_SeqFeature
849              
850             Title : add_SeqFeature
851             Usage : $feat->add_SeqFeature($subfeat);
852             $feat->add_SeqFeature($subfeat,'EXPAND');
853             Function: Adds a SeqFeature into the subSeqFeature array.
854             With no 'EXPAND' qualifer, subfeat will be tested
855             as to whether it lies inside the parent, and throw
856             an exception if not.
857              
858             If EXPAND is used, the parent's start/end/strand will
859             be adjusted so that it grows to accommodate the new
860             subFeature
861              
862             !IMPORTANT! The coordinates of the subfeature should not be relative
863             to the parent feature it is attached to, but relative to the sequence
864             the parent feature is located on.
865              
866             Returns : nothing
867             Args : An object which has the SeqFeatureI interface
868              
869             =cut
870              
871             sub add_SeqFeature {
872 23287     23287 1 31754 my ($self,$feat,$expand) = @_;
873 23287 50       32828 unless( defined $feat ) {
874 0         0 $self->warn("Called add_SeqFeature with no feature, ignoring");
875 0         0 return;
876             }
877 23287 50       46423 if ( !$feat->isa('Bio::SeqFeatureI') ) {
878 0         0 $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
879             }
880              
881 23287 100 66     51204 if($expand && ($expand eq 'EXPAND')) {
882 18952         25045 $self->_expand_region($feat);
883             } else {
884 4335 50       7153 if ( !$self->contains($feat) ) {
885 0         0 $self->throw("$feat is not contained within parent feature, and expansion is not valid");
886             }
887             }
888              
889 23287 100       41109 $self->{'_gsf_sub_array'} = [] unless exists($self->{'_gsf_sub_array'});
890 23287         22180 push(@{$self->{'_gsf_sub_array'}},$feat);
  23287         61472  
891              
892             }
893              
894              
895             =head2 remove_SeqFeatures
896              
897             Title : remove_SeqFeatures
898             Usage : $feat->remove_SeqFeatures;
899             Function: Removes all SeqFeatures
900              
901             If you want to remove only a subset of features then remove that
902             subset from the returned array, and add back the rest.
903             Example :
904             Returns : The array of Bio::SeqFeatureI implementing features that was
905             deleted.
906             Args : none
907              
908             =cut
909              
910             sub remove_SeqFeatures {
911 48     48 1 59 my ($self) = @_;
912 48 100       52 my @subfeats = @{$self->{'_gsf_sub_array'} || []};
  48         120  
913 48         85 $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
914 48         71 return @subfeats;
915             }
916              
917              
918             =head1 GFF-related methods
919              
920             =head2 gff_format
921              
922             Title : gff_format
923             Usage : # get:
924             my $gffio = $feat->gff_format();
925             # set (change the default version of GFF2):
926             $feat->gff_format(Bio::Tools::GFF->new(-gff_version => 1));
927             Function: Get/set the GFF format interpreter. This object is supposed to
928             format and parse GFF. See Bio::Tools::GFF for the interface.
929              
930             If this method is called as class method, the default for all
931             newly created instances will be changed. Otherwise only this
932             instance will be affected.
933             Example :
934             Returns : a Bio::Tools::GFF compliant object
935             Args : On set, an instance of Bio::Tools::GFF or a derived object.
936              
937             =cut
938              
939             sub gff_format {
940 1     1 1 3 my ($self, $gffio) = @_;
941 1 50       3 if(defined($gffio)) {
942 0 0       0 if(ref($self)) {
943 0         0 $self->{'_gffio'} = $gffio;
944             } else {
945 0         0 $Bio::SeqFeatureI::static_gff_formatter = $gffio;
946             }
947             }
948             return (ref($self) && exists($self->{'_gffio'}) ?
949 1 50 33     12 $self->{'_gffio'} : $self->_static_gff_formatter);
950             }
951              
952              
953             =head2 gff_string
954              
955             Title : gff_string
956             Usage : my $str = $feat->gff_string;
957             my $str = $feat->gff_string($gff_formatter);
958             Function: Provides the feature information in GFF format.
959              
960             We override this here from Bio::SeqFeatureI in order to use the
961             formatter returned by gff_format().
962              
963             Returns : A string
964             Args : Optionally, an object implementing gff_string().
965              
966             =cut
967              
968             sub gff_string {
969 8     8 1 61 my ($self,$formatter) = @_;
970 8 100       17 $formatter = $self->gff_format() unless $formatter;
971 8         19 return $formatter->gff_string($self);
972             }
973              
974              
975             =head2 slurp_gff_file
976              
977             Title : slurp_file
978             Usage : my @features = Bio::SeqFeature::Generic::slurp_gff_file(\*FILE);
979             Function: Sneaky function to load an entire file as in memory objects.
980             Beware of big files.
981              
982             This method is deprecated. Use Bio::Tools::GFF instead, which can
983             also handle large files.
984              
985             Example :
986             Returns :
987             Args :
988              
989             =cut
990              
991             sub slurp_gff_file {
992 0     0 1 0 my ($f) = @_;
993 0         0 my @out;
994 0 0       0 if ( !defined $f ) {
995 0         0 Bio::Root::Root->throw("Must have a filehandle");
996             }
997              
998 0         0 Bio::Root::Root->deprecated( -message => "deprecated method slurp_gff_file() called in Bio::SeqFeature::Generic. Use Bio::Tools::GFF instead.",
999             -warn_version => '1.005',
1000             -throw_version => '1.007',
1001             );
1002              
1003 0         0 while(<$f>) {
1004 0         0 my $sf = Bio::SeqFeature::Generic->new('-gff_string' => $_);
1005 0         0 push(@out, $sf);
1006             }
1007              
1008 0         0 return @out;
1009             }
1010              
1011              
1012             =head2 _from_gff_string
1013              
1014             Title : _from_gff_string
1015             Usage :
1016             Function: Set feature properties from GFF string.
1017              
1018             This method uses the object returned by gff_format() for the
1019             actual interpretation of the string. Set a different GFF format
1020             interpreter first if you need a specific version, like GFF1. (The
1021             default is GFF2.)
1022             Example :
1023             Returns :
1024             Args : a GFF-formatted string
1025              
1026             =cut
1027              
1028             sub _from_gff_string {
1029 0     0   0 my ($self, $string) = @_;
1030 0         0 $self->gff_format()->from_gff_string($self, $string);
1031             }
1032              
1033              
1034             =head2 _expand_region
1035              
1036             Title : _expand_region
1037             Usage : $feat->_expand_region($feature);
1038             Function: Expand the total region covered by this feature to
1039             accommodate for the given feature.
1040              
1041             May be called whenever any kind of subfeature is added to this
1042             feature. add_SeqFeature() already does this.
1043             Returns :
1044             Args : A Bio::SeqFeatureI implementing object.
1045              
1046             =cut
1047              
1048             sub _expand_region {
1049 19497     19497   22251 my ($self, $feat) = @_;
1050 19497 50       34996 if(! $feat->isa('Bio::SeqFeatureI')) {
1051 0         0 $self->warn("$feat does not implement Bio::SeqFeatureI");
1052             }
1053             # if this doesn't have start set - forget it!
1054             # changed to reflect sanity checks for LocationI
1055 19497 100       21441 if(!$self->location->valid_Location) {
1056 2414         3330 $self->start($feat->start);
1057 2414         4070 $self->end($feat->end);
1058 2414 100       3400 $self->strand($feat->strand) unless $self->strand;
1059             } else {
1060 17083         29082 my ($start,$end,$strand) = $self->union($feat);
1061 17083         29349 $self->start($start);
1062 17083         26202 $self->end($end);
1063 17083         21237 $self->strand($strand);
1064             }
1065             }
1066              
1067              
1068             =head2 _parse
1069              
1070             Title : _parse
1071             Usage :
1072             Function: Parsing hints
1073             Example :
1074             Returns :
1075             Args :
1076              
1077             =cut
1078              
1079             sub _parse {
1080 0     0   0 my ($self) = @_;
1081 0         0 return $self->{'_parse_h'};
1082             }
1083              
1084              
1085             =head2 _tag_value
1086              
1087             Title : _tag_value
1088             Usage :
1089             Function: For internal use only. Convenience method for those tags that
1090             may only have a single value.
1091             Returns : The first value under the given tag as a scalar (string)
1092             Args : The tag as a string. Optionally, the value on set.
1093              
1094             =cut
1095              
1096             sub _tag_value {
1097 11184     11184   12066 my $self = shift;
1098 11184         11186 my $tag = shift;
1099              
1100 11184 100 100     18050 if(@_ || (! $self->has_tag($tag))) {
1101 10907 100       15934 $self->remove_tag($tag) if($self->has_tag($tag));
1102 10907         17803 $self->add_tag_value($tag, @_);
1103             }
1104 11184         18088 return ($self->get_tag_values($tag))[0];
1105             }
1106              
1107              
1108             #######################################################################
1109             # aliases for methods that changed their names in an attempt to make #
1110             # bioperl names more consistent #
1111             #######################################################################
1112              
1113             sub seqname {
1114 0     0 0 0 my $self = shift;
1115 0         0 $self->warn("SeqFeatureI::seqname() is deprecated. Please use seq_id() instead.");
1116 0         0 return $self->seq_id(@_);
1117             }
1118              
1119             sub display_id {
1120 0     0 0 0 my $self = shift;
1121 0         0 $self->warn("SeqFeatureI::display_id() is deprecated. Please use display_name() instead.");
1122 0         0 return $self->display_name(@_);
1123             }
1124              
1125             # this is towards consistent naming
1126 280     280 0 434 sub each_tag_value { return shift->get_tag_values(@_); }
1127 91     91 0 220 sub all_tags { return shift->get_all_tags(@_); }
1128              
1129             # we revamped the feature containing property to implementing
1130             # Bio::FeatureHolderI
1131             *sub_SeqFeature = \&get_SeqFeatures;
1132             *add_sub_SeqFeature = \&add_SeqFeature;
1133             *flush_sub_SeqFeatures = \&remove_SeqFeatures;
1134             # this one is because of inconsistent naming ...
1135             *flush_sub_SeqFeature = \&remove_SeqFeatures;
1136              
1137             sub cleanup_generic {
1138 41069     41069 0 43985 my $self = shift;
1139 41069 100       39585 foreach my $f ( @{$self->{'_gsf_sub_array'} || []} ) {
  41069         104925  
1140 1228         2087 $f = undef;
1141             }
1142 41069         59346 $self->{'_gsf_seq'} = undef;
1143 41069         38265 foreach my $t ( keys %{$self->{'_gsf_tag_hash'} } ) {
  41069         116913  
1144 55372         79358 $self->{'_gsf_tag_hash'}->{$t} = undef;
1145 55372         119544 delete($self->{'_gsf_tag_hash'}->{$t}); # bug 1720 fix
1146             }
1147             }
1148              
1149             1;