File Coverage

Bio/RangeI.pm
Criterion Covered Total %
statement 168 207 81.1
branch 79 138 57.2
condition 24 39 61.5
subroutine 23 23 100.0
pod 13 13 100.0
total 307 420 73.1


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::RangeI
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Lehvaslaiho
7             #
8             # Copyright Matthew Pocock
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::RangeI - Range interface
17              
18             =head1 SYNOPSIS
19              
20             #Do not run this module directly
21              
22             =head1 DESCRIPTION
23              
24             This provides a standard BioPerl range interface that should be
25             implemented by any object that wants to be treated as a range. This
26             serves purely as an abstract base class for implementers and can not
27             be instantiated.
28              
29             Ranges are modeled as having (start, end, length, strand). They use
30             Bio-coordinates - all points E= start and E= end are within the
31             range. End is always greater-than or equal-to start, and length is
32             greater than or equal to 1. The behaviour of a range is undefined if
33             ranges with negative numbers or zero are used.
34              
35             So, in summary:
36              
37             length = end - start + 1
38             end >= start
39             strand = (-1 | 0 | +1)
40              
41             =head1 FEEDBACK
42              
43             =head2 Mailing Lists
44              
45             User feedback is an integral part of the evolution of this and other
46             Bioperl modules. Send your comments and suggestions preferably to one
47             of the Bioperl mailing lists. Your participation is much appreciated.
48              
49             bioperl-l@bioperl.org - General discussion
50             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51              
52             =head2 Support
53              
54             Please direct usage questions or support issues to the mailing list:
55              
56             I
57              
58             rather than to the module maintainer directly. Many experienced and
59             reponsive experts will be able look at the problem and quickly
60             address it. Please include a thorough description of the problem
61             with code and data examples if at all possible.
62              
63             =head2 Reporting Bugs
64              
65             Report bugs to the Bioperl bug tracking system to help us keep track
66             the bugs and their resolution. Bug reports can be submitted via the
67             web:
68              
69             https://github.com/bioperl/bioperl-live/issues
70              
71             =head1 AUTHOR - Heikki Lehvaslaiho
72              
73             Email: heikki-at-bioperl-dot-org
74              
75             =head1 CONTRIBUTORS
76              
77             Juha Muilu (muilu@ebi.ac.uk)
78             Sendu Bala (bix@sendu.me.uk)
79             Malcolm Cook (mec@stowers-institute.org)
80             Stephen Montgomery (sm8 at sanger.ac.uk)
81              
82             =head1 APPENDIX
83              
84             The rest of the documentation details each of the object
85             methods. Internal methods are usually preceded with a _
86              
87             =cut
88              
89             package Bio::RangeI;
90              
91 217     217   938 use strict;
  217         310  
  217         4765  
92 217     217   648 use Carp;
  217         273  
  217         8479  
93 217     217   692 use integer;
  217         239  
  217         1055  
94 217     217   3650 use vars qw(%STRAND_OPTIONS);
  217         241  
  217         6962  
95              
96 217     217   749 use base qw(Bio::Root::RootI);
  217         1004  
  217         16797  
97              
98             BEGIN {
99             # STRAND_OPTIONS contains the legal values for the strand-testing options
100 217     217   376 %STRAND_OPTIONS = map { $_, '_' . $_ }
  651         319633  
101             (
102             'strong', # ranges must have the same strand
103             'weak', # ranges must have the same strand or no strand
104             'ignore', # ignore strand information
105             );
106             }
107              
108             # utility methods
109             #
110              
111             # returns true if strands are equal and non-zero
112             sub _strong {
113 8     8   8 my ($r1, $r2) = @_;
114 8         15 my ($s1, $s2) = ($r1->strand(), $r2->strand());
115              
116 8 100 100     52 return 1 if $s1 != 0 && $s1 == $s2;
117             }
118              
119             # returns true if strands are equal or either is zero
120             sub _weak {
121 8     8   9 my ($r1, $r2) = @_;
122 8         16 my ($s1, $s2) = ($r1->strand(), $r2->strand());
123 8 100 100     60 return 1 if $s1 == 0 || $s2 == 0 || $s1 == $s2;
      100        
124             }
125              
126             # returns true for any strandedness
127             sub _ignore {
128 1     1   292 return 1;
129             }
130              
131             # works out what test to use for the strictness and returns true/false
132             # e.g. $r1->_testStrand($r2, 'strong')
133             sub _testStrand() {
134 12981     12981   12301 my ($r1, $r2, $comp) = @_;
135 12981 100       38751 return 1 unless $comp;
136 10         13 my $func = $STRAND_OPTIONS{$comp};
137 10         27 return $r1->$func($r2);
138             }
139              
140             =head1 Abstract methods
141              
142             These methods must be implemented in all subclasses.
143              
144             =head2 start
145              
146             Title : start
147             Usage : $start = $range->start();
148             Function: get/set the start of this range
149             Returns : the start of this range
150             Args : optionally allows the start to be set
151             using $range->start($start)
152              
153             =cut
154              
155             sub start {
156 1     1 1 641 shift->throw_not_implemented();
157             }
158              
159             =head2 end
160              
161             Title : end
162             Usage : $end = $range->end();
163             Function: get/set the end of this range
164             Returns : the end of this range
165             Args : optionally allows the end to be set
166             using $range->end($end)
167              
168             =cut
169              
170             sub end {
171 1     1 1 532 shift->throw_not_implemented();
172             }
173              
174             =head2 length
175              
176             Title : length
177             Usage : $length = $range->length();
178             Function: get/set the length of this range
179             Returns : the length of this range
180             Args : optionally allows the length to be set
181             using $range->length($length)
182              
183             =cut
184              
185             sub length {
186 1     1 1 508 shift->throw_not_implemented();
187             }
188              
189             =head2 strand
190              
191             Title : strand
192             Usage : $strand = $range->strand();
193             Function: get/set the strand of this range
194             Returns : the strandedness (-1, 0, +1)
195             Args : optionally allows the strand to be set
196             using $range->strand($strand)
197              
198             =cut
199              
200             sub strand {
201 1     1 1 521 shift->throw_not_implemented();
202             }
203              
204             =head1 Boolean Methods
205              
206             These methods return true or false. They throw an error if start and
207             end are not defined.
208              
209             $range->overlaps($otherRange) && print "Ranges overlap\n";
210              
211             =head2 overlaps
212              
213             Title : overlaps
214             Usage : if($r1->overlaps($r2)) { do stuff }
215             Function: tests if $r2 overlaps $r1
216             Args : arg #1 = a range to compare this one to (mandatory)
217             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
218             Returns : true if the ranges overlap, false otherwise
219              
220             =cut
221              
222             sub overlaps {
223 45     45 1 535 my ($self, $other, $so) = @_;
224              
225 45 50       81 $self->throw("start is undefined") unless defined $self->start;
226 44 50       71 $self->throw("end is undefined") unless defined $self->end;
227 44 50 33     208 $self->throw("not a Bio::RangeI object") unless defined $other &&
228             $other->isa('Bio::RangeI');
229 44 50       64 $other->throw("start is undefined") unless defined $other->start;
230 44 50       66 $other->throw("end is undefined") unless defined $other->end;
231              
232             return
233 44   100     86 ($self->_testStrand($other, $so)
234             and not (
235             ($self->start() > $other->end() or
236             $self->end() < $other->start() )
237             ));
238             }
239              
240             =head2 contains
241              
242             Title : contains
243             Usage : if($r1->contains($r2) { do stuff }
244             Function: tests whether $r1 totally contains $r2
245             Args : arg #1 = a range to compare this one to (mandatory)
246             alternatively, integer scalar to test
247             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
248             Returns : true if the argument is totally contained within this range
249              
250             =cut
251              
252             sub contains {
253 8289     8289 1 7190 my ($self, $other, $so) = @_;
254 8289 50       11528 $self->throw("start is undefined") unless defined $self->start;
255 8288 50       11546 $self->throw("end is undefined") unless defined $self->end;
256              
257 8288 50 33     23247 if(defined $other && ref $other) { # a range object?
258 8288 50       19931 $other->throw("Not a Bio::RangeI object: $other") unless $other->isa('Bio::RangeI');
259 8288 50       10478 $other->throw("start is undefined") unless defined $other->start;
260 8288 50       12318 $other->throw("end is undefined") unless defined $other->end;
261              
262 8288   100     13053 return ($self->_testStrand($other, $so) and
263             $other->start() >= $self->start() and
264             $other->end() <= $self->end());
265             } else { # a scalar?
266 0 0       0 $self->throw("'$other' is not an integer.\n") unless $other =~ /^[-+]?\d+$/;
267 0   0     0 return ($other >= $self->start() and $other <= $self->end());
268             }
269             }
270              
271             =head2 equals
272              
273             Title : equals
274             Usage : if($r1->equals($r2))
275             Function: test whether $r1 has the same start, end, length as $r2
276             Args : arg #1 = a range to compare this one to (mandatory)
277             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
278             Returns : true if they are describing the same range
279              
280             =cut
281              
282             sub equals {
283 1     1 1 474 my ($self, $other, $so) = @_;
284              
285 1 0       7 $self->throw("start is undefined") unless defined $self->start;
286 0 0       0 $self->throw("end is undefined") unless defined $self->end;
287 0 0       0 $other->throw("Not a Bio::RangeI object") unless $other->isa('Bio::RangeI');
288 0 0       0 $other->throw("start is undefined") unless defined $other->start;
289 0 0       0 $other->throw("end is undefined") unless defined $other->end;
290              
291 0   0     0 return ($self->_testStrand($other, $so) and
292             $self->start() == $other->start() and
293             $self->end() == $other->end() );
294             }
295              
296             =head1 Geometrical methods
297              
298             These methods do things to the geometry of ranges, and return
299             Bio::RangeI compliant objects or triplets (start, stop, strand) from
300             which new ranges could be built.
301              
302             =head2 intersection
303              
304             Title : intersection
305             Usage : ($start, $end, $strand) = $r1->intersection($r2); OR
306             ($start, $end, $strand) = Bio::Range->intersection(\@ranges); OR
307             my $containing_range = $r1->intersection($r2); OR
308             my $containing_range = Bio::Range->intersection(\@ranges);
309             Function: gives the range that is contained by all ranges
310             Returns : undef if they do not overlap or if @ranges has only a
311             single range, else returns the range that they do
312             overlap. In scalar contex, the return value is an object of
313             the same class as the calling one. In array context the
314             return value is a three element array.
315             Args : arg #1 = [REQUIRED] a Bio::RangeI to compare this one to,
316             or an array ref of ranges
317             arg #2 = optional strand-testing arg ('strong', 'weak', 'ignore')
318              
319             =cut
320              
321             sub intersection {
322 4438     4438 1 5113 my ($self, $given, $so) = @_;
323 4438 100       7242 $self->throw("missing arg: you need to pass in another feature") unless $given;
324              
325 4437         3344 my @ranges;
326 4437 50       9719 if ($self eq "Bio::RangeI") {
327 0         0 $self = "Bio::Range";
328 0         0 $self->warn("calling static methods of an interface is deprecated; use $self instead");
329             }
330 4437 100       7383 if (ref $self) {
331 4436         4482 push(@ranges, $self);
332             }
333 4437 100       6401 ref($given) eq 'ARRAY' ? push(@ranges, @{$given}) : push(@ranges, $given);
  3         6  
334             #$self->throw("Need at least 2 ranges") unless @ranges >= 2;
335             # Rather than the above, I think the following is more consistent
336 4437 50       7561 return undef unless @ranges >= 2;
337              
338 4437         3804 my $intersect;
339 4437         7066 while (@ranges > 0) {
340 4440 100       6830 unless ($intersect) {
341 4437         4618 $intersect = shift(@ranges);
342 4437 50       6769 $self->throw("Not an object: $intersect") unless ref($intersect);
343 4437 50       10685 $self->throw("Not a Bio::RangeI object: $intersect") unless $intersect->isa('Bio::RangeI');
344 4437 50       7665 $self->throw("start is undefined") unless defined $intersect->start;
345 4437 50       7775 $self->throw("end is undefined") unless defined $intersect->end;
346             }
347              
348 4440         4403 my $compare = shift(@ranges);
349 4440 50       7908 $self->throw("Not an object: $compare") unless ref($compare);
350 4440 50       12487 $self->throw("Not a Bio::RangeI object: $compare") unless $compare->isa('Bio::RangeI');
351 4440 50       7263 $self->throw("start is undefined") unless defined $compare->start;
352 4440 50       6286 $self->throw("end is undefined") unless defined $compare->end;
353 4440 50       7421 return unless $compare->_testStrand($intersect, $so);
354              
355 4440         7546 my @starts = sort {$a <=> $b} ($intersect->start(), $compare->start());
  4440         10454  
356 4440         7813 my @ends = sort {$a <=> $b} ($intersect->end(), $compare->end());
  4440         9901  
357              
358 4440         4961 my $start = pop @starts; # larger of the 2 starts
359 4440         4884 my $end = shift @ends; # smaller of the 2 ends
360              
361 4440         3695 my $intersect_strand; # strand for the intersection
362 4440 100 33     7418 if (defined($intersect->strand) && defined($compare->strand) && $intersect->strand == $compare->strand) {
      66        
363 4284         5497 $intersect_strand = $compare->strand;
364             }
365             else {
366 156         144 $intersect_strand = 0;
367             }
368              
369 4440 100       7879 if ($start > $end) {
370 109         255 return;
371             }
372             else {
373 4331         12550 $intersect = $self->new(-start => $start,
374             -end => $end,
375             -strand => $intersect_strand);
376             }
377             }
378              
379 4328 100       6125 if (wantarray()) {
380 4         8 return ($intersect->start, $intersect->end, $intersect->strand);
381             }
382             else {
383 4324         6455 return $intersect;
384             }
385             }
386              
387             =head2 union
388              
389             Title : union
390             Usage : ($start, $end, $strand) = $r1->union($r2);
391             : ($start, $end, $strand) = Bio::Range->union(@ranges);
392             my $newrange = Bio::Range->union(@ranges);
393             Function: finds the minimal Range that contains all of the Ranges
394             Args : a Range or list of Range objects
395              
396             Returns : the range containing all of the range. In scalar contex,
397             the return value is an object of the same class as the
398             calling one. In array context the return value is a
399             three element array.
400              
401             =cut
402              
403             sub union {
404 21337     21337 1 18305 my $self = shift;
405 21337         23050 my @ranges = @_;
406 21337 50       39037 if ($self eq "Bio::RangeI") {
407 0         0 $self = "Bio::Range";
408 0         0 $self->warn("calling static methods of an interface is deprecated; use $self instead");
409             }
410 21337 100       33384 if(ref $self) {
411 21336         25601 unshift @ranges, $self;
412             }
413              
414 21341         47529 my @start = sort {$a<=>$b}
415 21337         22995 map( { $_->start() } @ranges);
  42676         61059  
416 21341         41373 my @end = sort {$a<=>$b}
417 21337         23572 map( { $_->end() } @ranges);
  42676         56171  
418              
419 21337         22929 my $start = shift @start;
420 21337         38798 while( !defined $start ) {
421 0         0 $start = shift @start;
422             }
423              
424 21337         18113 my $end = pop @end;
425              
426 21337         16717 my $union_strand; # Strand for the union range object.
427              
428 21337         25372 foreach(@ranges) {
429 42676 100       49273 if(! defined $union_strand) {
430 21354         32670 $union_strand = $_->strand;
431 21354         26465 next;
432             } else {
433 21322 100 100     28195 if(not defined $_->strand or $union_strand ne $_->strand) {
434 467         348 $union_strand = 0;
435 467         466 last;
436             }
437             }
438             }
439 21337 50 33     38971 return unless $start or $end;
440 21337 100       25115 if( wantarray() ) {
441 17083         37123 return ( $start,$end,$union_strand);
442             } else {
443 4254         7958 return $self->new('-start' => $start,
444             '-end' => $end,
445             '-strand' => $union_strand
446             );
447             }
448             }
449              
450             =head2 overlap_extent
451              
452             Title : overlap_extent
453             Usage : ($a_unique,$common,$b_unique) = $a->overlap_extent($b)
454             Function: Provides actual amount of overlap between two different
455             ranges
456             Example :
457             Returns : array of values containing the length unique to the calling
458             range, the length common to both, and the length unique to
459             the argument range
460             Args : a range
461              
462             =cut
463              
464             sub overlap_extent{
465 1     1 1 702 my ($a,$b) = @_;
466              
467 1 0       7 $a->throw("start is undefined") unless defined $a->start;
468 0 0       0 $a->throw("end is undefined") unless defined $a->end;
469 0 0       0 $b->throw("Not a Bio::RangeI object") unless $b->isa('Bio::RangeI');
470 0 0       0 $b->throw("start is undefined") unless defined $b->start;
471 0 0       0 $b->throw("end is undefined") unless defined $b->end;
472              
473 0 0       0 if( ! $a->overlaps($b) ) {
474 0         0 return ($a->length,0,$b->length);
475             }
476              
477 0         0 my ($au,$bu) = (0, 0);
478 0 0       0 if( $a->start < $b->start ) {
479 0         0 $au = $b->start - $a->start;
480             } else {
481 0         0 $bu = $a->start - $b->start;
482             }
483              
484 0 0       0 if( $a->end > $b->end ) {
485 0         0 $au += $a->end - $b->end;
486             } else {
487 0         0 $bu += $b->end - $a->end;
488             }
489              
490 0         0 my $intersect = $a->intersection($b);
491 0 0       0 if( ! $intersect ) {
492 0         0 warn("no intersection\n");
493 0         0 return ($au, 0, $bu);
494             } else {
495 0         0 my $ie = $intersect->end;
496 0         0 my $is = $intersect->start;
497 0         0 return ($au,$ie-$is+1,$bu);
498             }
499             }
500              
501             =head2 disconnected_ranges
502              
503             Title : disconnected_ranges
504             Usage : my @disc_ranges = Bio::Range->disconnected_ranges(@ranges);
505             Function: finds the minimal set of ranges such that each input range
506             is fully contained by at least one output range, and none of
507             the output ranges overlap
508             Args : a list of ranges
509             Returns : a list of objects of the same type as the input
510             (conforms to RangeI)
511              
512             =cut
513              
514             sub disconnected_ranges {
515 2080     2080 1 2060 my $self = shift;
516 2080 50       3940 if ($self eq "Bio::RangeI") {
517 0         0 $self = "Bio::Range";
518 0         0 $self->warn("calling static methods of an interface is deprecated; use $self instead");
519             }
520 2080         2561 my @inranges = @_;
521 2080 50       3698 if(ref $self) {
522 0         0 unshift @inranges, $self;
523             }
524              
525 2080         2126 my @outranges = (); # disconnected ranges
526              
527             # iterate through all input ranges $inrange,
528             # adding each input range to the set of output ranges @outranges,
529             # provided $inrange does not overlap ANY range in @outranges
530             # - if it does overlap an outrange, then merge it
531 2080         2145 foreach my $inrange (@inranges) {
532 6333         4980 my $intersects = 0;
533 6333         5307 my @outranges_new = ();
534 6333         5105 my @intersecting_ranges = ();
535              
536             # iterate through all @outranges, testing if it intersects
537             # current $inrange; if it does, merge and add to list
538             # of @intersecting_ranges, otherwise add $outrange to
539             # the new list of outranges that do NOT intersect
540 6333         10313 for (my $i=0; $i<@outranges; $i++) {
541 4289         4447 my $outrange = $outranges[$i];
542 4289         7550 my $intersection = $inrange->intersection($outrange);
543 4289 100       6473 if ($intersection) {
544 4250         3878 $intersects = 1;
545 4250         6690 my $union = $inrange->union($outrange);
546 4250         12104 push(@intersecting_ranges, $union);
547             }
548             else {
549 39         96 push(@outranges_new, $outrange);
550             }
551             }
552 6333         8873 @outranges = @outranges_new;
553             # @outranges now contains a list of non-overlapping ranges
554             # that do not intersect the current $inrange
555              
556 6333 100       8827 if (@intersecting_ranges) {
557 4250 50       5509 if (@intersecting_ranges > 1) {
558             # this sf intersected > 1 range, which means that
559             # all the ranges it intersects should be joined
560             # together in a new range
561 0         0 my $merged_range =
562             $self->union(@intersecting_ranges);
563 0         0 push(@outranges, $merged_range);
564              
565             }
566             else {
567             # exactly 1 intersecting range
568 4250         7691 push(@outranges, @intersecting_ranges);
569             }
570             }
571             else {
572             # no intersections found - new range
573 2083         4873 push(@outranges,
574             $self->new('-start'=>$inrange->start,
575             '-end'=>$inrange->end,
576             '-strand'=>$inrange->strand,
577             ));
578             }
579             }
580 2080         5725 return @outranges;
581             }
582              
583             =head2 offsetStranded
584              
585             Title : offsetStranded
586             Usage : $rnge->ofsetStranded($fiveprime_offset, $threeprime_offset)
587             Function : destructively modifies RangeI implementing object to
588             offset its start and stop coordinates by values $fiveprime_offset and
589             $threeprime_offset (positive values being in the strand direction).
590             Args : two integer offsets: $fiveprime_offset and $threeprime_offset
591             Returns : $self, offset accordingly.
592              
593             =cut
594              
595             sub offsetStranded {
596 5     5 1 727 my ($self, $offset_fiveprime, $offset_threeprime) = @_;
597 5 100       14 my ($offset_start, $offset_end) = $self->strand() eq -1 ? (- $offset_threeprime, - $offset_fiveprime) : ($offset_fiveprime, $offset_threeprime);
598 4         9 $self->start($self->start + $offset_start);
599 4         6 $self->end($self->end + $offset_end);
600 4         9 return $self;
601             };
602              
603             =head2 subtract
604              
605             Title : subtract
606             Usage : my @subtracted = $r1->subtract($r2)
607             Function: Subtract range r2 from range r1
608             Args : arg #1 = a range to subtract from this one (mandatory)
609             arg #2 = strand option ('strong', 'weak', 'ignore') (optional)
610             Returns : undef if they do not overlap or r2 contains this RangeI,
611             or an arrayref of Range objects (this is an array since some
612             instances where the subtract range is enclosed within this range
613             will result in the creation of two new disjoint ranges)
614              
615             =cut
616              
617             sub subtract() {
618 7     7 1 1007 my ($self, $range, $so) = @_;
619 7 100       18 $self->throw("missing arg: you need to pass in another feature")
620             unless $range;
621 6 100       13 return unless $self->_testStrand($range, $so);
622              
623 4 50       7 if ($self eq "Bio::RangeI") {
624 0         0 $self = "Bio::Range";
625 0         0 $self->warn("calling static methods of an interface is
626             deprecated; use $self instead");
627             }
628 4 50       12 $range->throw("Input a Bio::RangeI object") unless
629             $range->isa('Bio::RangeI');
630              
631 4         4 my @sub_locations;
632 4 100       7 if ($self->location->isa('Bio::Location::SplitLocationI') ) {
633 1         2 @sub_locations = $self->location->sub_Location;
634             } else {
635 3         4 @sub_locations = $self;
636             }
637              
638 4         3 my @outranges;
639 4         6 foreach my $sl (@sub_locations) {
640 6 100       12 if (!$sl->overlaps($range)) {
641 1         3 push(@outranges,
642             $self->new('-start' =>$sl->start,
643             '-end' =>$sl->end,
644             '-strand'=>$sl->strand,
645             ));
646 1         3 next;
647             }
648            
649             ##Subtracts everything
650 5 100       11 if ($range->contains($sl)) {
651 1         2 next;
652             }
653            
654 4         12 my ($start, $end, $strand) = $sl->intersection($range, $so);
655             ##Subtract intersection from $self range
656            
657 4 100       9 if ($sl->start < $start) {
658 3         5 push(@outranges,
659             $self->new('-start' =>$sl->start,
660             '-end' =>$start - 1,
661             '-strand'=>$sl->strand,
662             ));
663             }
664 4 100       9 if ($sl->end > $end) {
665 2         4 push(@outranges,
666             $self->new('-start' =>$end + 1,
667             '-end' =>$sl->end,
668             '-strand'=>$sl->strand,
669             ));
670             }
671             }
672              
673 4 100       8 if (@outranges) {
674 3         6 return \@outranges;
675             }
676 1         2 return;
677             }
678              
679             1;