File Coverage

Bio/DB/GFF/RelSegment.pm
Criterion Covered Total %
statement 191 296 64.5
branch 89 196 45.4
condition 36 78 46.1
subroutine 27 40 67.5
pod 29 30 96.6
total 372 640 58.1


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Bio::DB::GFF::RelSegment -- Sequence segment with relative coordinate support
4              
5             =head1 SYNOPSIS
6              
7             See L.
8              
9             =head1 DESCRIPTION
10              
11             Bio::DB::GFF::RelSegment is a stretch of sequence that can handle
12             relative coordinate addressing. It inherits from
13             Bio::DB::GFF::Segment, and is the base class for
14             Bio::DB::GFF::Feature.
15              
16             In addition to the source sequence, a relative segment has a
17             "reference sequence", which is used as the basis for its coordinate
18             system. The reference sequence can be changed at will, allowing you
19             freedom to change the "frame of reference" for features contained
20             within the segment. For example, by setting a segment's reference
21             sequence to the beginning of a gene, you can view all other features
22             in gene-relative coordinates.
23              
24             The reference sequence and the source sequence must be on the same
25             physical stretch of DNA, naturally. However, they do not have to be
26             on the same strand. The strandedness of the reference sequence
27             determines whether coordinates increase to the right or the left.
28              
29             Generally, you will not create or manipulate Bio::DB::GFF::RelSeg0ment
30             objects directly, but use those that are returned by the Bio::DB::GFF
31             module.
32              
33             =head2 An Example
34              
35             To understand how relative coordinates work, consider the following
36             example from the C. elegans database. First we create the appropriate
37             GFF accessor object (the factory):
38              
39             my $db = Bio::DB::GFF->new(-dsn => 'dbi:mysql:elegans',
40             -adaptor=>'dbi:mysqlopt');
41              
42             Now we fetch out a segment based on cosmid clone ZK909:
43              
44             my $seg = $db->segment('ZK909');
45              
46             If we call the segment's refseq() method, we see that the base of the
47             coordinate system is the sequence "ZK154", and that its start and
48             stop positions are 1 and the length of the cosmid:
49              
50             print $seg->refseq;
51             => ZK909
52              
53             print $seg->start,' - ',$seg->stop;
54             => 1 - 33782
55              
56             As a convenience, the "" operator is overloaded in this class, to give
57             the reference sequence, and start and stop positions:
58              
59             print $seg;
60             => ZK909:1,33782
61              
62             Internally, Bio::DB::GFF::RelSegment has looked up the absolute
63             coordinates of this segment and maintains the source sequence and the
64             absolute coordinates relative to the source sequence. We can see this
65             information using sourceseq() (inherited from Bio::DB::GFF::Segment)
66             and the abs_start() and abs_end() methods:
67              
68             print $seg->sourceseq;
69             => CHROMOSOME_I
70              
71             print $seg->abs_start,' - ',$seg->abs_end;
72             => 14839545 - 14873326
73              
74             We can also put the segment into absolute mode, so that it behaves
75             like Bio::DB::Segment, and always represents coordinates on the source
76             sequence. This is done by passing a true value to the absolute()
77             method:
78              
79             $seq->absolute(1);
80             print $seg;
81             => CHROMOSOME_I:14839545,14873326
82              
83             We can change the reference sequence at any time. One way is to call
84             the segment's ref() method, giving it the ID (and optionally the
85             class) of another landmark on the genome. For example, if we know
86             that cosmid ZK337 is adjacent to ZK909, then we can view ZK909 in
87             ZK337-relative coordinates:
88              
89             $seg->refseq('ZK337');
90             print $seg;
91             => ZK337:-33670,111
92              
93             We can call the segment's features() method in order to get the list
94             of contigs that overlap this segment (in the C. elegans database,
95             contigs have feature type "Sequence:Link"):
96              
97             @links = $seg->features('Sequence:Link');
98              
99             We can now set the reference sequence to the first of these contigs like so:
100              
101             $seg->refseq($links[0]);
102             print $seg;
103             => Sequence:Link(LINK_Y95D11A):3997326,4031107
104              
105             =cut
106              
107             package Bio::DB::GFF::RelSegment;
108              
109 3     3   6 use strict;
  3         6  
  3         249  
110              
111 3     3   1020 use Bio::DB::GFF::Feature;
  3         3  
  3         72  
112 3     3   15 use Bio::DB::GFF::Util::Rearrange;
  3         3  
  3         117  
113 3     3   9 use Bio::RangeI;
  3         6  
  3         48  
114              
115 3     3   9 use base qw(Bio::DB::GFF::Segment);
  3         3  
  3         294  
116              
117             use overload '""' => 'asString',
118 1749     1749   6406 'bool' => sub { overload::StrVal(shift) },
119 3     3   12 fallback=>1;
  3         3  
  3         21  
120              
121             =head1 API
122              
123             The remainder of this document describes the API for
124             Bio::DB::GFF::Segment.
125              
126             =cut
127              
128             =head2 new
129              
130             Title : new
131             Usage : $s = Bio::DB::GFF::RelSegment->new(@args)
132             Function: create a new relative segment
133             Returns : a new Bio::DB::GFF::RelSegment object
134             Args : see below
135             Status : Public
136              
137             This method creates a new Bio::DB::GFF::RelSegment object. Generally
138             this is called automatically by the Bio::DB::GFF module and
139             derivatives.
140              
141             This function uses a named-argument style:
142              
143             -factory a Bio::DB::GFF::Adaptor to use for database access
144             -seq ID of the source sequence
145             -class class of the source sequence
146             -start start of the desired segment relative to source sequence
147             -stop stop of the desired segment relative to source sequence
148             -ref ID of the reference sequence
149             -refclass class of the reference sequence
150             -offset 0-based offset from source sequence to start of segment
151             -length length of desired segment
152             -absolute, -force_absolute
153             use absolute coordinates, rather than coordinates relative
154             to the start of self or the reference sequence
155              
156             The -seq argument accepts the ID of any landmark in the database. The
157             stored source sequence becomes whatever the GFF file indicates is the
158             proper sequence for this landmark. A class of "Sequence" is assumed
159             unless otherwise specified in the -class argument.
160              
161             If the argument to -seq is a Bio::GFF::Featname object (such as
162             returned by the group() method), then the class is taken from that.
163              
164             The optional -start and -stop arguments specify the end points for the
165             retrieved segment. For those who do not like 1-based indexing,
166             -offset and -length are provided. If both -start/-stop and
167             -offset/-length are provided, the latter overrides the former.
168             Generally it is not a good idea to mix metaphors.
169              
170             -ref and -refclass together indicate a sequence to be used for
171             relative coordinates. If not provided, the source sequence indicated
172             by -seq is used as the reference sequence. If the argument to -ref is
173             a Bio::GFF::Featname object (such as returned by the group() method),
174             then the class is taken from that.
175              
176             -force_absolute should be used if you wish to skip the lookup of the
177             absolute position of the source sequence that ordinarily occurs when
178             you create a relative segment. In this case, the source sequence must
179             be a sequence that has been specified as the "source" in the GFF file.
180              
181             =cut
182              
183             # Create a new Bio::DB::GFF::RelSegment Object
184             # arguments are:
185             # -factory => factory and DBI interface
186             # -seq => $sequence_name
187             # -start => $start_relative_to_sequence
188             # -stop => $stop_relative_to_sequence
189             # -ref => $sequence which establishes coordinate system
190             # -offset => 0-based offset relative to sequence
191             # -length => length of segment
192             # -nocheck => turn off checking, force segment to be constructed
193             # -absolute => use absolute coordinate addressing
194              
195             sub new {
196 203     203 1 177 my $package = shift;
197 203         1207 my ($factory,$name,$start,$stop,$refseq,$class,$refclass,$offset,$length,$force_absolute,$nocheck) =
198             rearrange([
199             'FACTORY',
200             [qw(NAME SEQ SEQUENCE SOURCESEQ)],
201             [qw(START BEGIN)],
202             [qw(STOP END)],
203             [qw(REFSEQ REF REFNAME)],
204             [qw(CLASS SEQCLASS)],
205             qw(REFCLASS),
206             [qw(OFFSET OFF)],
207             [qw(LENGTH LEN)],
208             [qw(ABSOLUTE)],
209             [qw(NOCHECK FORCE)],
210             ],@_);
211              
212 203 50       682 $package = ref $package if ref $package;
213 203 50       324 $factory or $package->throw("new(): provide a -factory argument");
214              
215             # to allow people to use segments as sources
216 203 100 66     481 if (ref($name) && $name->isa('Bio::DB::GFF::Segment')) {
217 35 100       106 $start = 1 unless defined $start;
218 35 100       66 $stop = $name->length unless defined $stop;
219 35         91 return $name->subseq($start,$stop);
220             }
221              
222 168         119 my @object_results;
223              
224             # support for Featname objects
225 168 50 33     271 if (ref($name) && $name->can('class')) {
226 0         0 $class = $name->class;
227 0         0 $name = $name->name;
228             }
229              
230             # if the class of the landmark is not specified then default to 'Sequence'
231 168   50     299 $class ||= eval{$factory->default_class} || 'Sequence';
      66        
232              
233             # confirm that indicated sequence is actually in the database!
234 168         121 my @abscoords;
235              
236             # abscoords() will now return an array ref, each element of which is
237             # ($absref,$absclass,$absstart,$absstop,$absstrand)
238              
239 168 50       230 if ($nocheck) {
240 0         0 $force_absolute++;
241 0         0 $start = 1;
242             }
243              
244             # if ($force_absolute && defined($start)) { # absolute position is given to us
245             # @abscoords = ([$name,$class,$start,$stop,'+']);
246             # } else {
247 168 50       467 my $result = $factory->abscoords($name,$class,$force_absolute ? $name : ()) or return;
    100          
248 160         199 @abscoords = @$result;
249             # }
250              
251 160         205 foreach (@abscoords) {
252 160         349 my ($absref,$absclass,$absstart,$absstop,$absstrand,$sname) = @$_;
253 160 50       298 $sname = $name unless defined $sname;
254 160         158 my ($this_start,$this_stop,$this_length) = ($start,$stop,$length);
255              
256             # partially fill in object
257 160         297 my $self = bless { factory => $factory },$package;
258              
259 160   100     341 $absstrand ||= '+';
260              
261 160 50       312 if ($absstart > $absstop) { # AAARGH! DATA FORMAT ERROR! FIX.
262 0         0 ($absstart,$absstop) = ($absstop,$absstart);
263 0 0       0 $absstrand = $absstrand eq '+' ? '-' : '+';
264             }
265              
266             # an explicit length overrides start and stop
267 160 50       199 if (defined $offset) {
268 0 0       0 warn "new(): bad idea to call new() with both a start and an offset"
269             if defined $this_start;
270 0         0 $this_start = $offset+1;
271             }
272 160 50       243 if (defined $this_length) {
273 0 0       0 warn "new(): bad idea to call new() with both a stop and a length"
274             if defined $this_stop;
275 0         0 $this_stop = $this_start + $length - 1;
276             }
277              
278             # this allows a SQL optimization way down deep
279 160 100 100     867 $self->{whole}++ if $absref eq $sname and !defined($this_start) and !defined($this_stop);
      66        
280              
281 160 100       238 $this_start = 1 if !defined $this_start;
282 160 100       254 $this_stop = $absstop-$absstart+1 if !defined $this_stop;
283 160         159 $this_length = $this_stop - $this_start + 1;
284              
285             # now offset to correct subsegment based on desired start and stop
286 160 50       292 if ($force_absolute) {
    100          
287             # ($this_start,$this_stop) = ($absstart,$absstop);
288 0         0 $self->absolute(1);
289             } elsif ($absstrand eq '+') {
290 140         112 $this_start = $absstart + $this_start - 1;
291 140         130 $this_stop = $this_start + $this_length - 1;
292             } else {
293 20         30 $this_start = $absstop - ($this_start - 1);
294 20         25 $this_stop = $absstop - ($this_stop - 1);
295             }
296              
297             # handle truncation in either direction
298             # This only happens if the segment runs off the end of
299             # the reference sequence
300 160 100 66     337 if ($factory->strict_bounds_checking &&
      66        
301             (($this_start < $absstart) || ($this_stop > $absstop))) {
302             # return empty if we are completely off the end of the ref se
303 5 50 33     29 next unless $this_start<=$absstop && $this_stop>=$absstart;
304 5 50       10 if (my $a = $factory->abscoords($absref,'Sequence')) {
305 5         8 my $refstart = $a->[0][2];
306 5         7 my $refstop = $a->[0][3];
307 5 50       15 if ($this_start < $refstart) {
308 0         0 $this_start = $refstart;
309 0         0 $self->{truncated}{start}++;
310             }
311 5 50       13 if ($this_stop > $refstop) {
312 5         8 $this_stop = $absstop;
313 5         19 $self->{truncated}{stop}++;
314             }
315             }
316             }
317              
318 160         178 @{$self}{qw(sourceseq start stop strand class)}
  160         498  
319             = ($absref,$this_start,$this_stop,$absstrand,$absclass);
320              
321             # handle reference sequence
322 160 50       252 if (defined $refseq) {
323 0 0       0 $refclass = $refseq->class if $refseq->can('class');
324 0   0     0 $refclass ||= 'Sequence';
325 0         0 my ($refref,$refstart,$refstop,$refstrand) = $factory->abscoords($refseq,$refclass);
326 0 0       0 unless ($refref eq $absref) {
327 0         0 $self->error("reference sequence is on $refref but source sequence is on $absref");
328 0         0 return;
329             }
330 0 0       0 $refstart = $refstop if $refstrand eq '-';
331 0         0 @{$self}{qw(ref refstart refstrand)} = ($refseq,$refstart,$refstrand);
  0         0  
332             } else {
333 160 100       211 $absstart = $absstop if $absstrand eq '-';
334 160         166 @{$self}{qw(ref refstart refstrand)} = ($sname,$absstart,$absstrand);
  160         415  
335             }
336 160         302 push @object_results,$self;
337             }
338              
339 160 50       559 return wantarray ? @object_results : $object_results[0];
340             }
341              
342             # overridden methods
343             # start, stop, length
344             sub start {
345 1591     1591 1 5329 my $self = shift;
346 1591 50       1674 return $self->strand < 0 ? $self->{stop} : $self->{start} if $self->absolute;
    100          
347 1586         1979 $self->_abs2rel($self->{start});
348             }
349             sub end {
350 697     697 1 562 my $self = shift;
351 697 0       717 return $self->strand < 0 ? $self->{start} : $self->{stop} if $self->absolute;
    50          
352 697         859 $self->_abs2rel($self->{stop});
353             }
354             *stop = \&end;
355              
356             sub length {
357 35     35 1 3605 my $self = shift;
358 35 50       96 return unless defined $self->abs_end;
359 35         50 abs($self->abs_end - $self->abs_start) + 1;
360             }
361              
362             sub abs_start {
363 120     120 1 107 my $self = shift;
364 120 50       151 if ($self->absolute) {
365 0         0 my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end);
366 0 0       0 return ($a<$b) ? $a : $b;
367             }
368             else {
369 120         216 return $self->SUPER::abs_start(@_);
370             }
371             }
372             sub abs_end {
373 140     140 1 102 my $self = shift;
374 140 50       195 if ($self->absolute) {
375 0         0 my ($a,$b) = ($self->SUPER::abs_start,$self->SUPER::abs_end);
376 0 0       0 return ($a>$b) ? $a : $b;
377             }
378              
379             else {
380 140         262 return $self->SUPER::abs_end(@_);
381             }
382             }
383              
384             *abs_stop = \&abs_end;
385              
386             =head2 refseq
387              
388             Title : refseq
389             Usage : $ref = $s->refseq([$newseq] [,$newseqclass])
390             Function: get/set reference sequence
391             Returns : current reference sequence
392             Args : new reference sequence and class (optional)
393             Status : Public
394              
395             This method will get or set the reference sequence. Called with no
396             arguments, it returns the current reference sequence. Called with
397             either a sequence ID and class, a Bio::DB::GFF::Segment object (or
398             subclass) or a Bio::DB::GFF::Featname object, it will set the current
399             reference sequence and return the previous one.
400              
401             The method will generate an exception if you attempt to set the
402             reference sequence to a sequence that isn't contained in the database,
403             or one that has a different source sequence from the segment.
404              
405             =cut
406              
407             #'
408             sub refseq {
409 372     372 1 284 my $self = shift;
410 372         394 my $g = $self->{ref};
411 372 100       490 if (@_) {
412 50         85 my ($newref,$newclass);
413 50 100       78 if (@_ == 2) {
414 10         15 $newclass = shift;
415 10         11 $newref = shift;
416             } else {
417 40         36 $newref = shift;
418 40         42 $newclass = 'Sequence';
419             }
420              
421 50 50       81 defined $newref or $self->throw('refseq() called with an undef reference sequence');
422              
423             # support for Featname objects
424 50 100 66     287 $newclass = $newref->class if ref($newref) && $newref->can('class');
425              
426             # $self->throw("Cannot define a segment's reference sequence in terms of itself!")
427             # if ref($newref) and overload::StrVal($newref) eq overload::StrVal($self);
428              
429 50         51 my ($refsource,undef,$refstart,$refstop,$refstrand);
430 50 100       183 if ($newref->isa('Bio::DB::GFF::RelSegment')) {
431 30 100       51 ($refsource,undef,$refstart,$refstop,$refstrand) =
432             ($newref->sourceseq,undef,$newref->abs_start,$newref->abs_end,$newref->abs_strand >= 0 ? '+' : '-');
433             } else {
434 20         43 my $coords = $self->factory->abscoords($newref,$newclass);
435 20         32 foreach (@$coords) { # find the appropriate one
436 20         43 ($refsource,undef,$refstart,$refstop,$refstrand) = @$_;
437 20 100       68 last if $refsource eq $self->{sourceseq};
438             }
439            
440             }
441             $self->throw("can't set reference sequence: $newref and $self are on different sequence segments")
442 50 100       137 unless $refsource eq $self->{sourceseq};
443              
444 45         50 @{$self}{qw(ref refstart refstrand)} = ($newref,$refstart,$refstrand);
  45         103  
445 45         74 $self->absolute(0);
446             }
447 367 100       479 return $self->absolute ? $self->sourceseq : $g;
448             }
449              
450              
451             =head2 abs_low
452              
453             Title : abs_low
454             Usage : $s->abs_low
455             Function: the absolute lowest coordinate of the segment
456             Returns : an integer
457             Args : none
458             Status : Public
459              
460             This is for GadFly compatibility, and returns the low coordinate in
461             absolute coordinates;
462              
463             =cut
464              
465             sub abs_low {
466 0     0 1 0 my $self = shift;
467 0         0 my ($a,$b) = ($self->abs_start,$self->abs_end);
468 0 0       0 return ($a<$b) ? $a : $b;
469             }
470              
471             =head2 abs_high
472              
473             Title : abs_high
474             Usage : $s->abs_high
475             Function: the absolute highest coordinate of the segment
476             Returns : an integer
477             Args : none
478             Status : Public
479              
480             This is for GadFly compatibility, and returns the high coordinate in
481             absolute coordinates;
482              
483             =cut
484              
485             sub abs_high {
486 0     0 1 0 my $self = shift;
487 0         0 my ($a,$b) = ($self->abs_start,$self->abs_end);
488 0 0       0 return ($a>$b) ? $a : $b;
489             }
490              
491              
492             =head2 asString
493              
494             Title : asString
495             Usage : $s->asString
496             Function: human-readable representation of the segment
497             Returns : a string
498             Args : none
499             Status : Public
500              
501             This method will return a human-readable representation of the
502             segment. It is the overloaded method call for the "" operator.
503              
504             Currently the format is:
505              
506             refseq:start,stop
507              
508             =cut
509              
510             sub asString {
511 10     10 1 10 my $self = shift;
512 10 50       18 return $self->SUPER::asString if $self->absolute;
513 10         13 my $label = $self->{ref};
514 10   50     18 my $start = $self->start || '';
515 10   50     24 my $stop = $self->stop || '';
516 10 100 66     62 if (ref($label) && overload::StrVal($self) eq overload::StrVal($label->ref)) {
517 5         42 $label = $self->abs_ref;
518 5         10 $start = $self->abs_start;
519 5         15 $stop = $self->abs_end;
520             }
521 10         85 return "$label:$start,$stop";
522             }
523              
524             =head2 name
525              
526             Title : name
527             Usage : Alias for asString()
528              
529             =cut
530              
531 0     0 1 0 sub name { shift->asString }
532              
533             =head2 absolute
534              
535             Title : absolute
536             Usage : $abs = $s->absolute([$abs])
537             Function: get/set absolute coordinates
538             Returns : a boolean flag
539             Args : new setting for flag (optional)
540             Status : Public
541              
542             Called with a boolean flag, this method controls whether to display
543             relative coordinates (relative to the reference sequence) or absolute
544             coordinates (relative to the source sequence). It will return the
545             previous value of the setting.
546              
547             =cut
548              
549             sub absolute {
550 6467     6467 1 4033 my $self = shift;
551 6467         4347 my $g = $self->{absolute};
552 6467 100       7222 $self->{absolute} = shift if @_;
553 6467         7751 $g;
554             }
555              
556             =head2 features
557              
558             Title : features
559             Usage : @features = $s->features(@args)
560             Function: get features that overlap this segment
561             Returns : a list of Bio::DB::GFF::Feature objects
562             Args : see below
563             Status : Public
564              
565             This method will find all features that overlap the segment and return
566             a list of Bio::DB::GFF::Feature objects. The features will use
567             coordinates relative to the reference sequence in effect at the time
568             that features() was called.
569              
570             The returned list can be limited to certain types of feature by
571             filtering on their method and/or source. In addition, it is possible
572             to obtain an iterator that will step through a large number of
573             features sequentially.
574              
575             Arguments can be provided positionally or using the named arguments
576             format. In the former case, the arguments are a list of feature types
577             in the format "method:source". Either method or source can be
578             omitted, in which case the missing component is treated as a wildcard.
579             If no colon is present, then the type is treated as a method name.
580             Multiple arguments are ORed together.
581              
582             Examples:
583              
584             @f = $s->features('exon:curated'); # all curated exons
585             @f = $s->features('exon:curated','intron'); # curated exons and all introns
586             @f = $s->features('similarity:.*EST.*'); # all similarities
587             # having something to do
588             # with ESTs
589              
590             The named parameter form gives you control over a few options:
591              
592             -types an array reference to type names in the format
593             "method:source"
594              
595             -merge Whether to apply aggregators to the generated features (default yes)
596              
597             -rare Turn on an optimization suitable for a relatively rare feature type,
598             where it will be faster to filter by feature type first
599             and then by position, rather than vice versa.
600              
601             -attributes a hashref containing a set of attributes to match
602              
603             -range_type One of 'overlapping', 'contains', or 'contained_in'
604              
605             -iterator Whether to return an iterator across the features.
606              
607             -binsize A true value will create a set of artificial features whose
608             start and stop positions indicate bins of the given size, and
609             whose scores are the number of features in the bin. The
610             class and method of the feature will be set to "bin",
611             its source to "method:source", and its group to "bin:method:source".
612             This is a handy way of generating histograms of feature density.
613              
614             -merge is a boolean flag that controls whether the adaptor's
615             aggregators wll be applied to the features returned by this method.
616              
617             If -iterator is true, then the method returns a single scalar value
618             consisting of a Bio::SeqIO object. You can call next_seq() repeatedly
619             on this object to fetch each of the features in turn. If iterator is
620             false or absent, then all the features are returned as a list.
621              
622             The -attributes argument is a hashref containing one or more
623             attributes to match against:
624              
625             -attributes => { Gene => 'abc-1',
626             Note => 'confirmed' }
627              
628             Attribute matching is simple string matching, and multiple attributes
629             are ANDed together.
630              
631             =cut
632              
633             #'
634              
635             # return all features that overlap with this segment;
636             # optionally modified by a list of types to filter on
637             sub features {
638 85     85 1 3725 my $self = shift;
639 85         195 my @args = $self->_process_feature_args(@_);
640 85         234 return $self->factory->overlapping_features(@args);
641             }
642              
643             =head2 get_SeqFeatures
644              
645             Title : get_SeqFeatures
646             Usage :
647             Function: returns the top level sequence features
648             Returns : L objects
649             Args : none
650              
651             Segments do not ordinarily return any subfeatures.
652              
653             =cut
654              
655             # A SEGMENT DOES NOT HAVE SUBFEATURES!
656 0     0 1 0 sub get_SeqFeatures { return }
657              
658             =head2 feature_count
659              
660             Title : feature_count
661             Usage : $seq->feature_count()
662             Function: Return the number of SeqFeatures attached to a sequence
663             Returns : integer representing the number of SeqFeatures
664             Args : none
665              
666             This method comes through extension of Bio::FeatureHolderI. See
667             L for more information.
668              
669             =cut
670              
671             sub feature_count {
672 5     5 1 2106 my $self = shift;
673 5         10 my $ct = 0;
674 5         25 my %type_counts = $self->types(-enumerate=>1);
675 5         17 map { $ct += $_ } values %type_counts;
  30         28  
676 5         18 $ct;
677             }
678              
679             =head2 get_feature_stream
680              
681             Title : features
682             Usage : $stream = $s->get_feature_stream(@args)
683             Function: get a stream of features that overlap this segment
684             Returns : a Bio::SeqIO::Stream-compliant stream
685             Args : see below
686             Status : Public
687              
688             This is the same as features(), but returns a stream. Use like this:
689              
690             $stream = $s->get_feature_stream('exon');
691             while (my $exon = $stream->next_seq) {
692             print $exon->start,"\n";
693             }
694              
695             =cut
696              
697             sub get_feature_stream {
698 5     5 1 2459 my $self = shift;
699 5 50 33     60 my @args = defined($_[0]) && $_[0] =~ /^-/ ? (@_,-iterator=>1) : (-types=>\@_,-iterator=>1);
700 5         10 $self->features(@args);
701             }
702              
703             =head2 get_seq_stream
704              
705             Title : get_seq_stream
706             Usage : $stream = $s->get_seq_stream(@args)
707             Function: get a stream of features that overlap this segment
708             Returns : a Bio::SeqIO::Stream-compliant stream
709             Args : see below
710             Status : Public
711              
712             This is the same as feature_stream(), and is provided for Bioperl
713             compatibility. Use like this:
714              
715             $stream = $s->get_seq_stream('exon');
716             while (my $exon = $stream->next_seq) {
717             print $exon->start,"\n";
718             }
719              
720             =cut
721              
722             *get_seq_stream = \&get_feature_stream;
723              
724              
725             =head2 overlapping_features
726              
727             Title : overlapping_features
728             Usage : @features = $s->overlapping_features(@args)
729             Function: get features that overlap this segment
730             Returns : a list of Bio::DB::GFF::Feature objects
731             Args : see features()
732             Status : Public
733              
734             This is an alias for the features() method, and takes the same
735             arguments.
736              
737             =cut
738              
739             *overlapping_features = \&features;
740              
741             =head2 contained_features
742              
743             Title : contained_features
744             Usage : @features = $s->contained_features(@args)
745             Function: get features that are contained by this segment
746             Returns : a list of Bio::DB::GFF::Feature objects
747             Args : see features()
748             Status : Public
749              
750             This is identical in behavior to features() except that it returns
751             only those features that are completely contained within the segment,
752             rather than any that overlap.
753              
754             =cut
755              
756             # return all features completely contained within this segment
757             sub contained_features {
758 0     0 1 0 my $self = shift;
759 0         0 local $self->{whole} = 0;
760 0         0 my @args = $self->_process_feature_args(@_);
761 0         0 return $self->factory->contained_features(@args);
762             }
763              
764             # *contains = \&contained_features;
765              
766             =head2 contained_in
767              
768             Title : contained_in
769             Usage : @features = $s->contained_in(@args)
770             Function: get features that contain this segment
771             Returns : a list of Bio::DB::GFF::Feature objects
772             Args : see features()
773             Status : Public
774              
775             This is identical in behavior to features() except that it returns
776             only those features that completely contain the segment.
777              
778             =cut
779              
780             # return all features completely contained within this segment
781             sub contained_in {
782 0     0 1 0 my $self = shift;
783 0         0 local $self->{whole} = 0;
784 0         0 my @args = $self->_process_feature_args(@_);
785 0         0 return $self->factory->contained_in(@args);
786             }
787              
788             =head2 delete
789              
790             Title : delete
791             Usage : $db->delete(@args)
792             Function: delete features
793             Returns : count of features deleted -- if available
794             Args : numerous, see below
795             Status : public
796              
797             This method deletes all features that overlap the specified region or
798             are of a particular type. If no arguments are provided and the -force
799             argument is true, then deletes ALL features.
800              
801             Arguments:
802              
803             -type,-types Either a single scalar type to be deleted, or an
804             reference to an array of types.
805              
806             -range_type Control the range type of the deletion. One of "overlaps" (default)
807             "contains" or "contained_in"
808              
809             Examples:
810              
811             $segment->delete(-type=>['intron','repeat:repeatMasker']); # remove all introns & repeats
812             $segment->delete(-type=>['intron','repeat:repeatMasker']
813             -range_type => 'contains'); # remove all introns & repeats
814             # strictly contained in segment
815              
816             IMPORTANT NOTE: This method only deletes features. It does *NOT*
817             delete the names of groups that contain the deleted features. Group
818             IDs will be reused if you later load a feature with the same group
819             name as one that was previously deleted.
820              
821             NOTE ON FEATURE COUNTS: The DBI-based versions of this call return the
822             result code from the SQL DELETE operation. Some dbd drivers return the
823             count of rows deleted, while others return 0E0. Caveat emptor.
824              
825             =cut
826              
827             # return all features completely contained within this segment
828             sub delete {
829 5     5 1 50 my $self = shift;
830 5         42 my ($type,$range_type) =
831             rearrange([[qw(TYPE TYPES)],'RANGE_TYPE'],@_);
832 5         21 my $types = $self->factory->parse_types($type); # parse out list of types
833 5   50     13 $range_type ||= 'overlaps';
834 5         10 return $self->factory->_delete({
835             segments => [$self],
836             types => $types,
837             range_type => $range_type
838             });
839             }
840              
841             =head2 _process_feature_args
842              
843             Title : _process_feature_args
844             Usage : @args = $s->_process_feature_args(@args)
845             Function: preprocess arguments passed to features,
846             contained_features, and overlapping_features
847             Returns : a list of parsed arguents
848             Args : see feature()
849             Status : Internal
850              
851             This is an internal method that is used to check and format the
852             arguments to features() before passing them on to the adaptor.
853              
854             =cut
855              
856             sub _process_feature_args {
857 85     85   62 my $self = shift;
858              
859             my ($ref,$class,$start,$stop,$strand,$whole)
860 85         92 = @{$self}{qw(sourceseq class start stop strand whole)};
  85         180  
861              
862 85 100 66     378 ($start,$stop) = ($stop,$start) if defined $strand && $strand eq '-';
863              
864 85         226 my @args = (-ref=>$ref,-class=>$class);
865              
866             # indicating that we are fetching the whole segment allows certain
867             # SQL optimizations.
868 85 100       187 push @args,(-start=>$start,-stop=>$stop) unless $whole;
869              
870 85 100       146 if (@_) {
871 70 100       207 if ($_[0] =~ /^-/) {
872 45         69 push @args,@_;
873             } else {
874 25         37 my @types = @_;
875 25         75 push @args,-types=>\@types;
876             }
877             }
878 85         117 push @args,-parent=>$self;
879 85         279 @args;
880             }
881              
882             =head2 types
883              
884             Title : types
885             Usage : @types = $s->types([-enumerate=>1])
886             Function: list feature types that overlap this segment
887             Returns : a list of Bio::DB::GFF::Typename objects or a hash
888             Args : see below
889             Status : Public
890              
891             The types() method will return a list of Bio::DB::GFF::Typename
892             objects, each corresponding to a feature that overlaps the segment.
893             If the optional -enumerate parameter is set to a true value, then the
894             method will return a hash in which the keys are the type names and the
895             values are the number of times a feature of that type is present on
896             the segment. For example:
897              
898             %count = $s->types(-enumerate=>1);
899              
900             =cut
901              
902             # wrapper for lower-level types() call.
903             sub types {
904 15     15 1 1367 my $self = shift;
905 15         18 my ($ref,$class,$start,$stop,$strand) = @{$self}{qw(sourceseq class start stop strand)};
  15         35  
906 15 50       40 ($start,$stop) = ($stop,$start) if $strand eq '-';
907              
908 15         18 my @args;
909 15 50 66     92 if (@_ && $_[0] !~ /^-/) {
910 0         0 @args = (-type => \@_)
911             } else {
912 15         32 @args = @_;
913             }
914 15         31 $self->factory->types(-ref => $ref,
915             -start=> $start,
916             -stop => $stop,
917             @args);
918             }
919              
920             =head1 Internal Methods
921              
922             The following are internal methods and should not be called directly.
923              
924             =head2 new_from_segment
925              
926             Title : new_from_segment
927             Usage : $s = $segment->new_from_segment(@args)
928             Function: create a new relative segment
929             Returns : a new Bio::DB::GFF::RelSegment object
930             Args : see below
931             Status : Internal
932              
933             This constructor is used internally by the subseq() method. It forces
934             the new segment into the Bio::DB::GFF::RelSegment package, regardless
935             of the package that it is called from. This causes subclass-specfic
936             information, such as feature types, to be dropped when a subsequence
937             is created.
938              
939             =cut
940              
941             sub new_from_segment {
942 35     35 1 37 my $package = shift;
943 35 50       69 $package = ref $package if ref $package;
944 35         34 my $segment = shift;
945 35         39 my $new = {};
946 35         163 @{$new}{qw(factory sourceseq start stop strand class ref refstart refstrand)}
947 35         31 = @{$segment}{qw(factory sourceseq start stop strand class ref refstart refstrand)};
  35         66  
948 35         76 return bless $new,__PACKAGE__;
949             }
950              
951             =head2 _abs2rel
952              
953             Title : _abs2rel
954             Usage : @coords = $s->_abs2rel(@coords)
955             Function: convert absolute coordinates into relative coordinates
956             Returns : a list of relative coordinates
957             Args : a list of absolute coordinates
958             Status : Internal
959              
960             This is used internally to map from absolute to relative
961             coordinates. It does not take the offset of the reference sequence
962             into account, so please use abs2rel() instead.
963              
964             =cut
965              
966             sub _abs2rel {
967 2283     2283   1447 my $self = shift;
968 2283         1351 my @result;
969 2283 50       2649 return unless defined $_[0];
970              
971 2283 50       2352 if ($self->absolute) {
972 0         0 @result = @_;
973             } else {
974 2283         1398 my ($refstart,$refstrand) = @{$self}{qw(refstart refstrand)};
  2283         2802  
975 161         301 @result = defined($refstrand) && $refstrand eq '-' ? map { $refstart - $_ + 1 } @_
976 2283 100 100     6613 : map { $_ - $refstart + 1 } @_;
  2122         3386  
977             }
978             # if called with a single argument, caller will expect a single scalar reply
979             # not the size of the returned array!
980 2283 100 66     7981 return $result[0] if @result == 1 and !wantarray;
981 80         1013 @result;
982             }
983              
984             =head2 rel2abs
985              
986             Title : rel2abs
987             Usage : @coords = $s->rel2abs(@coords)
988             Function: convert relative coordinates into absolute coordinates
989             Returns : a list of absolute coordinates
990             Args : a list of relative coordinates
991             Status : Public
992              
993             This function takes a list of positions in relative coordinates to the
994             segment, and converts them into absolute coordinates.
995              
996             =cut
997              
998             sub rel2abs {
999 0     0 1 0 my $self = shift;
1000 0         0 my @result;
1001              
1002 0 0       0 if ($self->absolute) {
1003 0         0 @result = @_;
1004             } else {
1005 0         0 my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand);
1006 0         0 @result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_
1007 0 0       0 : map { $_ + $abs_start - 1 } @_;
  0         0  
1008             }
1009             # if called with a single argument, caller will expect a single scalar reply
1010             # not the size of the returned array!
1011 0 0 0     0 return $result[0] if @result == 1 and !wantarray;
1012 0         0 @result;
1013             }
1014              
1015             =head2 abs2rel
1016              
1017             Title : abs2rel
1018             Usage : @rel_coords = $s->abs2rel(@abs_coords)
1019             Function: convert absolute coordinates into relative coordinates
1020             Returns : a list of relative coordinates
1021             Args : a list of absolute coordinates
1022             Status : Public
1023              
1024             This function takes a list of positions in absolute coordinates
1025             and returns a list expressed in relative coordinates.
1026              
1027             =cut
1028              
1029             sub abs2rel {
1030 0     0 1 0 my $self = shift;
1031 0         0 my @result;
1032              
1033 0 0       0 if ($self->absolute) {
1034 0         0 @result = @_;
1035             } else {
1036 0         0 my ($abs_start,$abs_strand) = ($self->abs_start,$self->abs_strand);
1037 0         0 @result = $abs_strand < 0 ? map { $abs_start - $_ + 1 } @_
1038 0 0       0 : map { $_ - $abs_start + 1 } @_;
  0         0  
1039             }
1040             # if called with a single argument, caller will expect a single scalar reply
1041             # not the size of the returned array!
1042 0 0 0     0 return $result[0] if @result == 1 and !wantarray;
1043 0         0 @result;
1044             }
1045              
1046             sub subseq {
1047 35     35 1 30 my $self = shift;
1048 35         90 my $obj = $self->SUPER::subseq(@_);
1049 35         94 bless $obj,__PACKAGE__; # always bless into the generic RelSegment package
1050             }
1051              
1052             sub strand {
1053 627     627 1 516 my $self = shift;
1054 627 100       670 if ($self->absolute) {
1055 10         25 return _to_strand($self->{strand});
1056             }
1057 617         773 my $start = $self->start;
1058 617         754 my $stop = $self->stop;
1059 617 50 33     1770 return 0 unless defined $start and defined $stop;
1060 617         2148 return $stop <=> $start;
1061             }
1062              
1063             sub _to_strand {
1064 10     10   10 my $s = shift;
1065 10 100       38 return -1 if $s eq '-';
1066 5 50       28 return +1 if $s eq '+';
1067 0           return 0;
1068             }
1069              
1070             =head2 Bio::RangeI Methods
1071              
1072             The following Bio::RangeI methods are supported:
1073              
1074             overlaps(), contains(), equals(),intersection(),union(),overlap_extent()
1075              
1076             =cut
1077              
1078             sub intersection {
1079 0     0 1   my $self = shift;
1080 0           my (@ranges) = @_;
1081 0 0         unshift @ranges,$self if ref $self;
1082 0 0         $ranges[0]->isa('Bio::DB::GFF::RelSegment')
1083             or return $self->SUPER::intersection(@_);
1084              
1085 0           my $ref = $ranges[0]->abs_ref;
1086 0           my ($low,$high);
1087 0           foreach (@ranges) {
1088 0 0         return unless $_->can('abs_ref');
1089 0 0         $ref eq $_->abs_ref or return;
1090 0 0 0       $low = $_->abs_low if !defined($low) or $low < $_->abs_low;
1091 0 0 0       $high = $_->abs_high if !defined($high) or $high > $_->abs_high;
1092             }
1093              
1094 0 0         return unless $low < $high;
1095 0           return Bio::DB::GFF::RelSegment->new(-factory => $self->factory,
1096             -seq => $ref,
1097             -start => $low,
1098             -stop => $high,
1099             );
1100             }
1101              
1102             sub overlaps {
1103 0     0 1   my $self = shift;
1104 0           my($other,$so) = @_;
1105 0 0         return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment');
1106 0 0         return if $self->abs_ref ne $other->abs_ref;
1107 0 0         return if $self->abs_low > $other->abs_high;
1108 0 0         return if $self->abs_high < $other->abs_low;
1109 0           1;
1110             }
1111              
1112             sub contains {
1113 0     0 1   my $self = shift;
1114 0           my($other,$so) = @_;
1115 0 0         return $self->SUPER::overlaps(@_) unless $other->isa('Bio::DB::GFF::RelSegment');
1116 0 0         return if $self->abs_ref ne $other->abs_ref;
1117 0 0         return unless $self->abs_low <= $other->abs_low;
1118 0 0         return unless $self->abs_high >= $other->abs_high;
1119 0           1;
1120             }
1121              
1122             sub union {
1123 0     0 1   my $self = shift;
1124 0           my (@ranges) = @_;
1125 0 0         unshift @ranges,$self if ref $self;
1126 0 0         $ranges[0]->isa('Bio::DB::GFF::RelSegment')
1127             or return $self->SUPER::union(@_);
1128              
1129 0           my $ref = $ranges[0]->abs_ref;
1130 0           my ($low,$high);
1131 0           foreach (@ranges) {
1132 0 0         return unless $_->can('abs_ref');
1133 0 0         $ref eq $_->abs_ref or return;
1134 0 0 0       $low = $_->abs_low if !defined($low) or $low > $_->abs_low;
1135 0 0 0       $high = $_->abs_high if !defined($high) or $high < $_->abs_high;
1136             }
1137 0           $self->new(-factory=> $self->factory,
1138             -seq => $ref,
1139             -start => $low,
1140             -stop => $high);
1141             }
1142              
1143 0     0 0   sub version { 0 }
1144              
1145              
1146             1;
1147              
1148             __END__