File Coverage

Bio/Align/PairwiseStatistics.pm
Criterion Covered Total %
statement 63 74 85.1
branch 26 38 68.4
condition 20 38 52.6
subroutine 8 8 100.0
pod 4 4 100.0
total 121 162 74.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Align::PairwiseStatistics
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Align::PairwiseStatistics - Base statistic object for Pairwise Alignments
17              
18             =head1 SYNOPSIS
19              
20             use strict;
21             my $stats = Bio::Align::PairwiseStatistics->new();
22              
23             # get alignment object of two sequences somehow
24             my $pwaln;
25             print $stats->number_of_comparable_bases($pwaln);
26             my $score = $stats->score_nuc($pwaln);
27              
28              
29             =head1 DESCRIPTION
30              
31             Calculate pairwise statistics.
32              
33             =head1 FEEDBACK
34              
35             =head2 Mailing Lists
36              
37             User feedback is an integral part of the evolution of this and other
38             Bioperl modules. Send your comments and suggestions preferably to
39             the Bioperl mailing list. Your participation is much appreciated.
40              
41             bioperl-l@bioperl.org - General discussion
42             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
43              
44             =head2 Support
45              
46             Please direct usage questions or support issues to the mailing list:
47              
48             I
49              
50             rather than to the module maintainer directly. Many experienced and
51             reponsive experts will be able look at the problem and quickly
52             address it. Please include a thorough description of the problem
53             with code and data examples if at all possible.
54              
55             =head2 Reporting Bugs
56              
57             Report bugs to the Bioperl bug tracking system to help us keep track
58             of the bugs and their resolution. Bug reports can be submitted via the
59             web:
60              
61             https://github.com/bioperl/bioperl-live/issues
62              
63             =head1 AUTHOR - Jason Stajich
64              
65             Email jason-at-bioperl-dot-org
66              
67             =head1 APPENDIX
68              
69             The rest of the documentation details each of the object methods.
70             Internal methods are usually preceded with a _
71              
72             =cut
73              
74              
75             # Let the code begin...
76              
77              
78             package Bio::Align::PairwiseStatistics;
79 4     4   13 use vars qw($GapChars);
  4         5  
  4         142  
80 4     4   14 use strict;
  4         5  
  4         78  
81              
82              
83 4     4   61 BEGIN { $GapChars = '(\.|\-)'; }
84              
85 4     4   11 use base qw(Bio::Root::Root Bio::Align::StatisticsI);
  4         6  
  4         1184  
86              
87             =head2 number_of_comparable_bases
88              
89             Title : number_of_comparable_bases
90             Usage : my $bases = $stat->number_of_comparable_bases($aln);
91             Function: Returns the count of the number of bases that can be
92             compared (L) in this alignment ( length - gaps)
93             Returns : integer
94             Args : L
95              
96              
97             =cut
98              
99             sub number_of_comparable_bases{
100 8     8 1 12 my ($self,$aln) = @_;
101 8 50 33     49 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
    50          
102 0         0 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
103             "Bio::Align::PairwiseStatistics");
104 0         0 return 0;
105             } elsif ( $aln->num_sequences != 2 ) {
106 0         0 $self->throw("Only pairwise calculations supported. Found ".
107             $aln->num_sequences." sequences in alignment\n");
108             }
109 8         15 my $L = $aln->length - $self->number_of_gaps($aln);
110 8         19 return $L;
111             }
112              
113             =head2 number_of_differences
114              
115             Title : number_of_differences
116             Usage : my $nd = $stat->number_of_distances($aln);
117             Function: Returns the number of differences between two sequences
118             Returns : integer
119             Args : L
120              
121              
122             =cut
123              
124             sub number_of_differences{
125 2     2 1 4 my ($self,$aln) = @_;
126 2 50 33     20 if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
    50          
127 0         0 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
128             "Bio::Align::PairwiseStatistics");
129             } elsif ( $aln->num_sequences != 2 ) {
130 0         0 $self->throw("Only pairwise calculations supported. Found ".
131             $aln->num_sequences." sequences in alignment\n");
132             }
133 2         2 my (@seqs);
134 2         6 foreach my $seq ( $aln->each_seq ) {
135 4         11 push @seqs, [ split(//,$seq->seq())];
136             }
137 2         3 my $firstseq = shift @seqs;
138             #my $secondseq = shift @seqs;
139 2         3 my $diffcount = 0;
140 2         6 for (my $i = 0;$i<$aln->length; $i++ ) {
141 383 100       929 next if ( $firstseq->[$i] =~ /^$GapChars$/ );
142 356         304 foreach my $seq ( @seqs ) {
143 356 100       707 next if ( $seq->[$i] =~ /^$GapChars$/ );
144 343 100       792 if( $firstseq->[$i] ne $seq->[$i] ) {
145 40         72 $diffcount++;
146             }
147             }
148             }
149 2         37 return $diffcount;
150             }
151              
152             =head2 number_of_gaps
153              
154             Title : number_of_gaps
155             Usage : my $nd = $stat->number_of_gaps($aln);
156             Function: Returns the number of gapped positions among sequences in alignment
157             Returns : integer
158             Args : L
159              
160              
161             =cut
162              
163             sub number_of_gaps{
164 10     10 1 10 my ($self,$aln) = @_;
165 10 50 33     59 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
    50          
166 0         0 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
167             "Bio::Align::PairwiseStatistics");
168             } elsif ( $aln->num_sequences != 2 ) {
169 0         0 $self->throw("Only pairwise calculations supported. Found ".
170             $aln->num_sequences." sequences in alignment\n");
171             }
172 10         26 my $gapline = $aln->gap_line;
173             # this will count the number of '-' characters
174 10         84 return $gapline =~ tr/-/-/;
175             }
176              
177              
178             =head2 score_nuc
179              
180             Title : score_nuc
181             Usage : my $score = $stat->score_nuc($aln);
182             or
183             my $score = $stat->score_nuc(
184             -aln =>$aln,
185             -match => 1,
186             -mismatch => -1,
187             -gap_open => -1,
188             -gap_ext => -1
189             );
190             Function: Calculate the score of an alignment of 2 nucleic acid sequences. The
191             scoring parameters can be specified. Otherwise the blastn default
192             parameters are used: match = 2, mismatch = -3, gap opening = -5, gap
193             extension = -2
194             Returns : alignment score (number)
195             Args : L
196             match score [optional]
197             mismatch score [optional]
198             gap opening score [optional]
199             gap extension score [optional]
200              
201             =cut
202              
203             sub score_nuc {
204 4     4 1 10 my ($self, @args) = @_;
205 4         27 my ( $aln, $match, $mismatch, $gap_open, $gap_ext) = $self->_rearrange( [qw(
206             ALN MATCH MISMATCH GAP_OPEN GAP_EXT)], @args );
207 4 50 33     43 if ( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {
    50          
208 0         0 $self->throw("Must provide a Bio::Align::AlignI compliant object to ".
209             "Bio::Align::PairwiseStatistics");
210             } elsif ( $aln->num_sequences != 2 ) {
211 0         0 $self->throw("Only pairwise calculations supported. Found ".
212             $aln->num_sequences." sequences in alignment\n");
213             }
214 4         12 my $seq1 = $aln->get_seq_by_pos(1);
215 4         11 my $seq2 = $aln->get_seq_by_pos(2);
216 4 50 33     21 if (! ( ($seq1->alphabet eq 'dna' || $seq1->alphabet eq 'rna') &&
      33        
      33        
217             ($seq2->alphabet eq 'dna' || $seq2->alphabet eq 'rna') )) {
218 0         0 $self->throw("Can only score nucleic acid alignments");
219             }
220 4   100     17 $match ||= 2; # Blastn scoring defaults
221 4   100     10 $mismatch ||= -3;
222 4   100     11 $gap_open ||= -5;
223 4   100     9 $gap_ext ||= -2;
224 4         4 my $score = 0;
225 4         5 my $prevres1 = '-';
226 4         5 my $prevres2 = '-';
227 4         9 for (my $pos = 1 ; $pos <= $aln->length ; $pos++) {
228 766         910 my $res1 = $seq1->subseq($pos, $pos);
229 766         898 my $res2 = $seq2->subseq($pos, $pos);
230 766 100 100     1875 if (!($res1 eq '-' || $res2 eq '-')) { # no gap
231 686 100       622 if ($res1 eq $res2) { # same residue
232 606         464 $score += $match;
233             } else { # other residue
234 80         58 $score += $mismatch;
235             }
236             } else { # open or ext gap?
237 80         52 my $open = 0;
238 80 50 66     168 if (!($res1 eq '-' && $res2 eq '-')) { # exactly one gap
239 80         49 my $prevres = $prevres1;
240 80 100       117 $prevres = $prevres2 if $res2 eq '-';
241 80 100       108 $open = 1 unless $prevres eq '-';
242             } else { # 2 gaps
243 0 0 0     0 $open = 1 unless $prevres1 eq '-' && $prevres2 eq '-';
244             }
245 80 100       77 if ($open) {
246 32         28 $score += $gap_open; # gap opening
247             } else {
248 48         32 $score += $gap_ext; # gap extension
249             }
250             }
251 766         509 $prevres1 = $res1;
252 766         1084 $prevres2 = $res2;
253             }
254 4         38 return $score;
255             }
256              
257             1;