File Coverage

Bio/RangeI.pm
Criterion Covered Total %
statement 168 207 81.1
branch 79 138 57.2
condition 24 39 61.5
subroutine 23 23 100.0
pod 13 13 100.0
total 307 420 73.1


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::RangeI
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Lehvaslaiho
7             #
8             # Copyright Matthew Pocock
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::RangeI - Range interface
17              
18             =head1 SYNOPSIS
19              
20             #Do not run this module directly
21              
22             =head1 DESCRIPTION
23              
24             This provides a standard BioPerl range interface that should be
25             implemented by any object that wants to be treated as a range. This
26             serves purely as an abstract base class for implementers and can not
27             be instantiated.
28              
29             Ranges are modeled as having (start, end, length, strand). They use
30             Bio-coordinates - all points E= start and E= end are within the
31             range. End is always greater-than or equal-to start, and length is
32             greater than or equal to 1. The behaviour of a range is undefined if
33             ranges with negative numbers or zero are used.
34              
35             So, in summary:
36              
37             length = end - start + 1
38             end >= start
39             strand = (-1 | 0 | +1)
40              
41             =head1 FEEDBACK
42              
43             =head2 Mailing Lists
44              
45             User feedback is an integral part of the evolution of this and other
46             Bioperl modules. Send your comments and suggestions preferably to one
47             of the Bioperl mailing lists. Your participation is much appreciated.
48              
49             bioperl-l@bioperl.org - General discussion
50             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51              
52             =head2 Support
53              
54             Please direct usage questions or support issues to the mailing list:
55              
56             I
57              
58             rather than to the module maintainer directly. Many experienced and
59             reponsive experts will be able look at the problem and quickly
60             address it. Please include a thorough description of the problem
61             with code and data examples if at all possible.
62              
63             =head2 Reporting Bugs
64              
65             Report bugs to the Bioperl bug tracking system to help us keep track
66             the bugs and their resolution. Bug reports can be submitted via the
67             web:
68              
69             https://github.com/bioperl/bioperl-live/issues
70              
71             =head1 AUTHOR - Heikki Lehvaslaiho
72              
73             Email: heikki-at-bioperl-dot-org
74              
75             =head1 CONTRIBUTORS
76              
77             Juha Muilu (muilu@ebi.ac.uk)
78             Sendu Bala (bix@sendu.me.uk)
79             Malcolm Cook (mec@stowers-institute.org)
80             Stephen Montgomery (sm8 at sanger.ac.uk)
81              
82             =head1 APPENDIX
83              
84             The rest of the documentation details each of the object
85             methods. Internal methods are usually preceded with a _
86              
87             =cut
88              
89             package Bio::RangeI;
90              
91 217     217   1341 use strict;
  217         361  
  217         5226  
92 217     217   893 use Carp;
  217         358  
  217         9013  
93 217     217   1065 use integer;
  217         445  
  217         1205  
94 217     217   4863 use vars qw(%STRAND_OPTIONS);
  217         397  
  217         8206  
95              
96 217     217   1048 use base qw(Bio::Root::RootI);
  217         400  
  217         18707  
97              
98             BEGIN {
99             # STRAND_OPTIONS contains the legal values for the strand-testing options
100 217     217   712 %STRAND_OPTIONS = map { $_, '_' . $_ }
  651         379508  
101             (
102             'strong', # ranges must have the same strand
103             'weak', # ranges must have the same strand or no strand
104             'ignore', # ignore strand information
105             );
106             }
107              
108             # utility methods
109             #
110              
111             # returns true if strands are equal and non-zero
112             sub _strong {
113 8     8   15 my ($r1, $r2) = @_;
114 8         20 my ($s1, $s2) = ($r1->strand(), $r2->strand());
115              
116 8 100 100     70 return 1 if $s1 != 0 && $s1 == $s2;
117             }
118              
119             # returns true if strands are equal or either is zero
120             sub _weak {
121 8     8   13 my ($r1, $r2) = @_;
122 8         15 my ($s1, $s2) = ($r1->strand(), $r2->strand());
123 8 100 100     93 return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2;
      100        
124             }
125              
126             # returns true for any strandedness
127             sub _ignore {
128 1     1   376 return 1;
129             }
130              
131             # works out what test to use for the strictness and returns true/false
132             # e.g. $r1->_testStrand($r2, 'strong')
133             sub _testStrand() {
134 12981     12981   15669 my ($r1, $r2, $comp) = @_;
135 12981 100       30979 return 1 unless $comp;
136 10         20 my $func = $STRAND_OPTIONS{$comp};
137 10         35 return $r1->$func($r2);
138             }
139              
140             =head1 Abstract methods
141              
142             These methods must be implemented in all subclasses.
143              
144             =head2 start
145              
146             Title : start
147             Usage : $start = $range->start();
148             Function: get/set the start of this range
149             Returns : the start of this range
150             Args : optionally allows the start to be set
151             using $range->start($start)
152              
153             =cut
154              
155             sub start {
156 1     1 1 597 shift->throw_not_implemented();
157             }
158              
159             =head2 end
160              
161             Title : end
162             Usage : $end = $range->end();
163             Function: get/set the end of this range
164             Returns : the end of this range
165             Args : optionally allows the end to be set
166             using $range->end($end)
167              
168             =cut
169              
170             sub end {
171 1     1 1 419 shift->throw_not_implemented();
172             }
173              
174             =head2 length
175              
176             Title : length
177             Usage : $length = $range->length();
178             Function: get/set the length of this range
179             Returns : the length of this range
180             Args : optionally allows the length to be set
181             using $range->length($length)
182              
183             =cut
184              
185             sub length {
186 1     1 1 488 shift->throw_not_implemented();
187             }
188              
189             =head2 strand
190              
191             Title : strand
192             Usage : $strand = $range->strand();
193             Function: get/set the strand of this range
194             Returns : the strandedness (-1, 0, +1)
195             Args : optionally allows the strand to be set
196             using $range->strand($strand)
197              
198             =cut
199              
200             sub strand {
201 1     1 1 437 shift->throw_not_implemented();
202             }
203              
204             =head1 Boolean Methods
205              
206             These methods return true or false. They throw an error if start and
207             end are not defined.
208              
209             $range->overlaps($otherRange) && print "Ranges overlap\n";
210              
211             =head2 overlaps
212              
213             Title : overlaps
214             Usage : if($r1->overlaps($r2)) { do stuff }
215             Function: tests if $r2 overlaps $r1
216             Args : arg #1 = a range to compare this one to (mandatory)
217             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
218             Returns : true if the ranges overlap, false otherwise
219              
220             =cut
221              
222             sub overlaps {
223 45     45 1 542 my ($self, $other, $so) = @_;
224              
225 45 50       110 $self->throw("start is undefined") unless defined $self->start;
226 44 50       87 $self->throw("end is undefined") unless defined $self->end;
227 44 50 33     186 $self->throw("not a Bio::RangeI object") unless defined $other &&
228             $other->isa('Bio::RangeI');
229 44 50       73 $other->throw("start is undefined") unless defined $other->start;
230 44 50       75 $other->throw("end is undefined") unless defined $other->end;
231              
232             return
233 44   100     102 ($self->_testStrand($other, $so)
234             and not (
235             ($self->start() > $other->end() or
236             $self->end() < $other->start() )
237             ));
238             }
239              
240             =head2 contains
241              
242             Title : contains
243             Usage : if($r1->contains($r2) { do stuff }
244             Function: tests whether $r1 totally contains $r2
245             Args : arg #1 = a range to compare this one to (mandatory)
246             alternatively, integer scalar to test
247             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
248             Returns : true if the argument is totally contained within this range
249              
250             =cut
251              
252             sub contains {
253 8289     8289 1 10285 my ($self, $other, $so) = @_;
254 8289 50       11075 $self->throw("start is undefined") unless defined $self->start;
255 8288 50       12188 $self->throw("end is undefined") unless defined $self->end;
256              
257 8288 50 33     21848 if(defined $other && ref $other) { # a range object?
258 8288 50       17841 $other->throw("Not a Bio::RangeI object: $other") unless $other->isa('Bio::RangeI');
259 8288 50       11277 $other->throw("start is undefined") unless defined $other->start;
260 8288 50       12876 $other->throw("end is undefined") unless defined $other->end;
261              
262 8288   100     13440 return ($self->_testStrand($other, $so) and
263             $other->start() >= $self->start() and
264             $other->end() <= $self->end());
265             } else { # a scalar?
266 0 0       0 $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/;
267 0   0     0 return ($other >= $self->start() and $other <= $self->end());
268             }
269             }
270              
271             =head2 equals
272              
273             Title : equals
274             Usage : if($r1->equals($r2))
275             Function: test whether $r1 has the same start, end, length as $r2
276             Args : arg #1 = a range to compare this one to (mandatory)
277             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
278             Returns : true if they are describing the same range
279              
280             =cut
281              
282             sub equals {
283 1     1 1 509 my ($self, $other, $so) = @_;
284              
285 1 0       7 $self->throw("start is undefined") unless defined $self->start;
286 0 0       0 $self->throw("end is undefined") unless defined $self->end;
287 0 0       0 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI');
288 0 0       0 $other->throw("start is undefined") unless defined $other->start;
289 0 0       0 $other->throw("end is undefined") unless defined $other->end;
290              
291 0   0     0 return ($self->_testStrand($other, $so) and
292             $self->start() == $other->start() and
293             $self->end() == $other->end() );
294             }
295              
296             =head1 Geometrical methods
297              
298             These methods do things to the geometry of ranges, and return
299             Bio::RangeI compliant objects or triplets (start, stop, strand) from
300             which new ranges could be built.
301              
302             =head2 intersection
303              
304             Title : intersection
305             Usage : ($start, $end, $strand) = $r1->intersection($r2); OR
306             ($start, $end, $strand) = Bio::Range->intersection(\@ranges); OR
307             my $containing_range = $r1->intersection($r2); OR
308             my $containing_range = Bio::Range->intersection(\@ranges);
309             Function: gives the range that is contained by all ranges
310             Returns : undef if they do not overlap or if @ranges has only a
311             single range, else returns the range that they do
312             overlap. In scalar contex, the return value is an object of
313             the same class as the calling one. In array context the
314             return value is a three element array.
315             Args : arg #1 = [REQUIRED] a Bio::RangeI to compare this one to,
316             or an array ref of ranges
317             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
318              
319             =cut
320              
321             sub intersection {
322 4438     4438 1 6807 my ($self, $given, $so) = @_;
323 4438 100       6821 $self->throw("missing arg: you need to pass in another feature") unless $given;
324              
325 4437         3930 my @ranges;
326 4437 50       8004 if ($self eq "Bio::RangeI") {
327 0         0 $self = "Bio::Range";
328 0         0 $self->warn("calling static methods of an interface is deprecated; use $self instead");
329             }
330 4437 100       6626 if (ref $self) {
331 4436         5166 push(@ranges, $self);
332             }
333 4437 100       6638 ref($given) eq 'ARRAY' ? push(@ranges, @{$given}) : push(@ranges, $given);
  3         5  
334             #$self->throw("Need at least 2 ranges") unless @ranges >= 2;
335             # Rather than the above, I think the following is more consistent
336 4437 50       6249 return undef unless @ranges >= 2;
337              
338 4437         4455 my $intersect;
339 4437         6270 while (@ranges > 0) {
340 4440 100       6784 unless ($intersect) {
341 4437         4592 $intersect = shift(@ranges);
342 4437 50       6736 $self->throw("Not an object: $intersect") unless ref($intersect);
343 4437 50       9673 $self->throw("Not a Bio::RangeI object: $intersect") unless $intersect->isa('Bio::RangeI');
344 4437 50       6361 $self->throw("start is undefined") unless defined $intersect->start;
345 4437 50       7426 $self->throw("end is undefined") unless defined $intersect->end;
346             }
347              
348 4440         5016 my $compare = shift(@ranges);
349 4440 50       6686 $self->throw("Not an object: $compare") unless ref($compare);
350 4440 50       9322 $self->throw("Not a Bio::RangeI object: $compare") unless $compare->isa('Bio::RangeI');
351 4440 50       6441 $self->throw("start is undefined") unless defined $compare->start;
352 4440 50       6916 $self->throw("end is undefined") unless defined $compare->end;
353 4440 50       7466 return unless $compare->_testStrand($intersect, $so);
354              
355 4440         7313 my @starts = sort {$a <=> $b} ($intersect->start(), $compare->start());
  4440         12356  
356 4440         8056 my @ends = sort {$a <=> $b} ($intersect->end(), $compare->end());
  4440         10347  
357              
358 4440         6037 my $start = pop @starts; # larger of the 2 starts
359 4440         4640 my $end = shift @ends; # smaller of the 2 ends
360              
361 4440         4220 my $intersect_strand; # strand for the intersection
362 4440 100 33     6872 if (defined($intersect->strand) && defined($compare->strand) && $intersect->strand == $compare->strand) {
      66        
363 4284         6164 $intersect_strand = $compare->strand;
364             }
365             else {
366 156         192 $intersect_strand = 0;
367             }
368              
369 4440 100       7726 if ($start > $end) {
370 109         238 return;
371             }
372             else {
373 4331         9875 $intersect = $self->new(-start => $start,
374             -end => $end,
375             -strand => $intersect_strand);
376             }
377             }
378              
379 4328 100       6701 if (wantarray()) {
380 4         6 return ($intersect->start, $intersect->end, $intersect->strand);
381             }
382             else {
383 4324         6918 return $intersect;
384             }
385             }
386              
387             =head2 union
388              
389             Title : union
390             Usage : ($start, $end, $strand) = $r1->union($r2);
391             : ($start, $end, $strand) = Bio::Range->union(@ranges);
392             my $newrange = Bio::Range->union(@ranges);
393             Function: finds the minimal Range that contains all of the Ranges
394             Args : a Range or list of Range objects
395              
396             Returns : the range containing all of the range. In scalar contex,
397             the return value is an object of the same class as the
398             calling one. In array context the return value is a
399             three element array.
400              
401             =cut
402              
403             sub union {
404 21337     21337 1 22142 my $self = shift;
405 21337         25580 my @ranges = @_;
406 21337 50       34366 if ($self eq "Bio::RangeI") {
407 0         0 $self = "Bio::Range";
408 0         0 $self->warn("calling static methods of an interface is deprecated; use $self instead");
409             }
410 21337 100       30484 if(ref $self) {
411 21336         26781 unshift @ranges, $self;
412             }
413              
414 21341         54254 my @start = sort {$a<=>$b}
415 21337         25463 map( { $_->start() } @ranges);
  42676         55104  
416 21341         45430 my @end = sort {$a<=>$b}
417 21337         28790 map( { $_->end() } @ranges);
  42676         57311  
418              
419 21337         26364 my $start = shift @start;
420 21337         32005 while( !defined $start ) {
421 0         0 $start = shift @start;
422             }
423              
424 21337         20849 my $end = pop @end;
425              
426 21337         19118 my $union_strand; # Strand for the union range object.
427              
428 21337         26507 foreach(@ranges) {
429 42676 100       49536 if(! defined $union_strand) {
430 21354         29865 $union_strand = $_->strand;
431 21354         29614 next;
432             } else {
433 21322 100 100     28618 if(not defined $_->strand or $union_strand ne $_->strand) {
434 467         511 $union_strand = 0;
435 467         524 last;
436             }
437             }
438             }
439 21337 50 33     34556 return unless $start or $end;
440 21337 100       25972 if( wantarray() ) {
441 17083         43629 return ( $start,$end,$union_strand);
442             } else {
443 4254         7484 return $self->new('-start' => $start,
444             '-end' => $end,
445             '-strand' => $union_strand
446             );
447             }
448             }
449              
450             =head2 overlap_extent
451              
452             Title : overlap_extent
453             Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b)
454             Function: Provides actual amount of overlap between two different
455             ranges
456             Example :
457             Returns : array of values containing the length unique to the calling
458             range, the length common to both, and the length unique to
459             the argument range
460             Args : a range
461              
462             =cut
463              
464             sub overlap_extent{
465 1     1 1 742 my ($a,$b) = @_;
466              
467 1 0       8 $a->throw("start is undefined") unless defined $a->start;
468 0 0       0 $a->throw("end is undefined") unless defined $a->end;
469 0 0       0 $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI');
470 0 0       0 $b->throw("start is undefined") unless defined $b->start;
471 0 0       0 $b->throw("end is undefined") unless defined $b->end;
472              
473 0 0       0 if( ! $a->overlaps($b) ) {
474 0         0 return ($a->length,0,$b->length);
475             }
476              
477 0         0 my ($au,$bu) = (0, 0);
478 0 0       0 if( $a->start < $b->start ) {
479 0         0 $au = $b->start - $a->start;
480             } else {
481 0         0 $bu = $a->start - $b->start;
482             }
483              
484 0 0       0 if( $a->end > $b->end ) {
485 0         0 $au += $a->end - $b->end;
486             } else {
487 0         0 $bu += $b->end - $a->end;
488             }
489              
490 0         0 my $intersect = $a->intersection($b);
491 0 0       0 if( ! $intersect ) {
492 0         0 warn("no intersection\n");
493 0         0 return ($au, 0, $bu);
494             } else {
495 0         0 my $ie = $intersect->end;
496 0         0 my $is = $intersect->start;
497 0         0 return ($au,$ie-$is+1,$bu);
498             }
499             }
500              
501             =head2 disconnected_ranges
502              
503             Title : disconnected_ranges
504             Usage : my @disc_ranges = Bio::Range->disconnected_ranges(@ranges);
505             Function: finds the minimal set of ranges such that each input range
506             is fully contained by at least one output range, and none of
507             the output ranges overlap
508             Args : a list of ranges
509             Returns : a list of objects of the same type as the input
510             (conforms to RangeI)
511              
512             =cut
513              
514             sub disconnected_ranges {
515 2080     2080 1 2462 my $self = shift;
516 2080 50       3351 if ($self eq "Bio::RangeI") {
517 0         0 $self = "Bio::Range";
518 0         0 $self->warn("calling static methods of an interface is deprecated; use $self instead");
519             }
520 2080         2979 my @inranges = @_;
521 2080 50       2984 if(ref $self) {
522 0         0 unshift @inranges, $self;
523             }
524              
525 2080         2029 my @outranges = (); # disconnected ranges
526              
527             # iterate through all input ranges $inrange,
528             # adding each input range to the set of output ranges @outranges,
529             # provided $inrange does not overlap ANY range in @outranges
530             # - if it does overlap an outrange, then merge it
531 2080         2552 foreach my $inrange (@inranges) {
532 6333         6384 my $intersects = 0;
533 6333         6530 my @outranges_new = ();
534 6333         5457 my @intersecting_ranges = ();
535              
536             # iterate through all @outranges, testing if it intersects
537             # current $inrange; if it does, merge and add to list
538             # of @intersecting_ranges, otherwise add $outrange to
539             # the new list of outranges that do NOT intersect
540 6333         10023 for (my $i=0; $i<@outranges; $i++) {
541 4289         4815 my $outrange = $outranges[$i];
542 4289         6367 my $intersection = $inrange->intersection($outrange);
543 4289 100       5832 if ($intersection) {
544 4250         4372 $intersects = 1;
545 4250         6403 my $union = $inrange->union($outrange);
546 4250         11155 push(@intersecting_ranges, $union);
547             }
548             else {
549 39         77 push(@outranges_new, $outrange);
550             }
551             }
552 6333         10726 @outranges = @outranges_new;
553             # @outranges now contains a list of non-overlapping ranges
554             # that do not intersect the current $inrange
555              
556 6333 100       9351 if (@intersecting_ranges) {
557 4250 50       6051 if (@intersecting_ranges > 1) {
558             # this sf intersected > 1 range, which means that
559             # all the ranges it intersects should be joined
560             # together in a new range
561 0         0 my $merged_range =
562             $self->union(@intersecting_ranges);
563 0         0 push(@outranges, $merged_range);
564              
565             }
566             else {
567             # exactly 1 intersecting range
568 4250         7465 push(@outranges, @intersecting_ranges);
569             }
570             }
571             else {
572             # no intersections found - new range
573 2083         3862 push(@outranges,
574             $self->new('-start'=>$inrange->start,
575             '-end'=>$inrange->end,
576             '-strand'=>$inrange->strand,
577             ));
578             }
579             }
580 2080         4328 return @outranges;
581             }
582              
583             =head2 offsetStranded
584              
585             Title : offsetStranded
586             Usage : $rnge->ofsetStranded($fiveprime_offset, $threeprime_offset)
587             Function : destructively modifies RangeI implementing object to
588             offset its start and stop coordinates by values $fiveprime_offset and
589             $threeprime_offset (positive values being in the strand direction).
590             Args : two integer offsets: $fiveprime_offset and $threeprime_offset
591             Returns : $self, offset accordingly.
592              
593             =cut
594              
595             sub offsetStranded {
596 5     5 1 650 my ($self, $offset_fiveprime, $offset_threeprime) = @_;
597 5 100       20 my ($offset_start, $offset_end) = $self->strand() eq -1 ? (- $offset_threeprime, - $offset_fiveprime) : ($offset_fiveprime, $offset_threeprime);
598 4         10 $self->start($self->start + $offset_start);
599 4         8 $self->end($self->end + $offset_end);
600 4         13 return $self;
601             };
602              
603             =head2 subtract
604              
605             Title : subtract
606             Usage : my @subtracted = $r1->subtract($r2)
607             Function: Subtract range r2 from range r1
608             Args : arg #1 = a range to subtract from this one (mandatory)
609             arg #2 = strand option ('strong', 'weak', 'ignore') (optional)
610             Returns : undef if they do not overlap or r2 contains this RangeI,
611             or an arrayref of Range objects (this is an array since some
612             instances where the subtract range is enclosed within this range
613             will result in the creation of two new disjoint ranges)
614              
615             =cut
616              
617             sub subtract() {
618 7     7 1 1247 my ($self, $range, $so) = @_;
619 7 100       21 $self->throw("missing arg: you need to pass in another feature")
620             unless $range;
621 6 100       15 return unless $self->_testStrand($range, $so);
622              
623 4 50       12 if ($self eq "Bio::RangeI") {
624 0         0 $self = "Bio::Range";
625 0         0 $self->warn("calling static methods of an interface is
626             deprecated; use $self instead");
627             }
628 4 50       16 $range->throw("Input a Bio::RangeI object") unless
629             $range->isa('Bio::RangeI');
630              
631 4         4 my @sub_locations;
632 4 100       6 if ($self->location->isa('Bio::Location::SplitLocationI') ) {
633 1         3 @sub_locations = $self->location->sub_Location;
634             } else {
635 3         7 @sub_locations = $self;
636             }
637              
638 4         5 my @outranges;
639 4         6 foreach my $sl (@sub_locations) {
640 6 100       17 if (!$sl->overlaps($range)) {
641 1         3 push(@outranges,
642             $self->new('-start' =>$sl->start,
643             '-end' =>$sl->end,
644             '-strand'=>$sl->strand,
645             ));
646 1         3 next;
647             }
648            
649             ##Subtracts everything
650 5 100       14 if ($range->contains($sl)) {
651 1         2 next;
652             }
653            
654 4         16 my ($start, $end, $strand) = $sl->intersection($range, $so);
655             ##Subtract intersection from $self range
656            
657 4 100       9 if ($sl->start < $start) {
658 3         7 push(@outranges,
659             $self->new('-start' =>$sl->start,
660             '-end' =>$start - 1,
661             '-strand'=>$sl->strand,
662             ));
663             }
664 4 100       9 if ($sl->end > $end) {
665 2         9 push(@outranges,
666             $self->new('-start' =>$end + 1,
667             '-end' =>$sl->end,
668             '-strand'=>$sl->strand,
669             ));
670             }
671             }
672              
673 4 100       10 if (@outranges) {
674 3         9 return \@outranges;
675             }
676 1         3 return;
677             }
678              
679             1;