File Coverage

blib/lib/GenOO/Data/File/SAM/CigarAndMDZ.pm
Criterion Covered Total %
statement 63 63 100.0
branch 14 14 100.0
condition 18 24 75.0
subroutine 5 5 100.0
pod 0 2 0.0
total 100 108 92.5


line stmt bran cond sub pod time code
1             # POD documentation - main docs before the code
2              
3             =head1 NAME
4              
5             GenOO::Data::File::SAM::CigarAndMDZ - Role that combines SAM CIGAR string with MD:Z tag
6              
7             =head1 SYNOPSIS
8              
9             This role when consumed requires specific attributes and provides
10             methods to extract information from the CIGAR string in combination
11             with the MD:Z tag.
12              
13             =head1 DESCRIPTION
14              
15             The cigar string does not usually contain information regarding the deletions.
16             For this the MD:Z tag is usually provided. Combining the CIGAR information with
17             the MD:Z tag we can extract information such as for example the deletion positions
18             on the query sequence.
19              
20             =head1 EXAMPLES
21              
22             # Get the location information on the reference sequence
23             $obj_with_role->mismatch_positions_on_query_calculated_from_mdz; # (10, 19)
24              
25             =cut
26              
27             # Let the code begin...
28              
29             package GenOO::Data::File::SAM::CigarAndMDZ;
30             $GenOO::Data::File::SAM::CigarAndMDZ::VERSION = '1.5.1';
31              
32             #######################################################################
33             ####################### Load External modules #####################
34             #######################################################################
35 2     2   1248 use Moose::Role;
  2         3  
  2         12  
36 2     2   6845 use namespace::autoclean;
  2         3  
  2         17  
37              
38              
39             #######################################################################
40             ######################## Required attributes ######################
41             #######################################################################
42             requires qw(query_length);
43              
44              
45             #######################################################################
46             ########################## Consumed Roles #########################
47             #######################################################################
48             with 'GenOO::Data::File::SAM::Cigar', 'GenOO::Data::File::SAM::MDZ';
49              
50              
51             #######################################################################
52             ######################## Interface Methods ########################
53             #######################################################################
54             sub mismatch_positions_on_query_calculated_from_mdz {
55 6     6 0 6 my ($self) = @_;
56             #Tag: AGTGATGGGA------GGATGTCTCGTCTGTGAGTTACAGCA -> CIGAR: 2M1I7M6D26M
57             # - -
58             #Genome: AG-GCTGGTAGCTCAGGGATGTCTCGTCTGTGAGTTACAGCA -> MD:Z: 3C3T1^GCTCAG26
59            
60 6         12 my $cigar = $self->cigar;
61            
62             # Find positions of insertions and deletions (dashes) on the query sequence
63 6         9 my @deletion_positions;
64             my @insertion_positions;
65 6         7 my $position = 0;
66 6         30 while ($cigar =~ /(\d+)([MIDNSHP=X])/g) {
67 28         44 my $count = $1;
68 28         31 my $identifier = $2;
69            
70 28 100 100     223 if ($identifier eq 'D' or $identifier eq 'N' or $identifier eq 'P' or $identifier eq 'H') {
    100 66        
      66        
      66        
71 6         26 push (@deletion_positions, $position + $_) for (0..$count-1)
72             }
73             elsif ($identifier eq 'I' or $identifier eq 'S') {
74 5         21 push (@insertion_positions, $position + $_) for (0..$count-1)
75             }
76 28         100 $position += $count;
77             }
78            
79 6         20 my @mismatch_positions_on_reference = $self->mismatch_positions_on_reference;
80 6         11 my @relative_mismatch_positions_on_reference = map {$_ - $self->start} @mismatch_positions_on_reference;
  15         586  
81            
82 6         8 my @mismatch_positions;
83 6         9 foreach my $relative_mismatch_position_on_reference (@relative_mismatch_positions_on_reference) {
84 15         34 my $deletion_adjustment = _how_many_are_smaller($relative_mismatch_position_on_reference, \@deletion_positions);
85 15         27 my $insertion_adjustment = _how_many_are_smaller($relative_mismatch_position_on_reference, \@insertion_positions);
86            
87 15         20 my $mismatch_position_on_query = $relative_mismatch_position_on_reference - $deletion_adjustment + $insertion_adjustment;
88            
89 15 100       582 if ($self->strand == -1) {
90 2         8 $mismatch_position_on_query = ($self->query_length - 1) - $mismatch_position_on_query;
91             }
92            
93 15         34 push @mismatch_positions, $mismatch_position_on_query;
94             }
95            
96 6         53 return @mismatch_positions;
97             }
98              
99             sub mismatch_nt_on_query {
100 7     7 0 944 my ($self) = @_;
101             #Tag: AGTGATGGGA------GGATGTCTCGTCTGTGAGTTACAGCA -> CIGAR: 2M1I7M6D26M
102             # - -
103             #Genome: AG-GCTGGTAGCTCAGGGATGTCTCGTCTGTGAGTTACAGCA -> MD:Z: 3C3T1^GCTCAG26
104            
105             #returns an array containing a hash with keys {query_nt} {reference_nt} {query_pos}
106            
107 7         17 my $cigar = $self->cigar;
108            
109             # Find positions of insertions and deletions (dashes) on the query sequence
110 7         7 my @deletion_positions;
111             my @insertion_positions;
112 7         8 my $position = 0;
113 7         24 while ($cigar =~ /(\d+)([MIDNSHP=X])/g) {
114 46         48 my $count = $1;
115 46         32 my $identifier = $2;
116            
117 46 100 100     290 if ($identifier eq 'D' or $identifier eq 'N' or $identifier eq 'P' or $identifier eq 'H') {
    100 66        
      66        
      66        
118 9         21 push (@deletion_positions, $position + $_) for (0..$count-1)
119             }
120             elsif ($identifier eq 'I' or $identifier eq 'S') {
121 8         20 push (@insertion_positions, $position + $_) for (0..$count-1)
122             }
123 46         111 $position += $count;
124             }
125            
126 7         21 my @mismatch_positions_on_reference = $self->mismatch_positions_on_reference;
127 7         11 my @relative_mismatch_positions_on_reference = map {$_ - $self->start} @mismatch_positions_on_reference;
  18         366  
128 7         15 my @nts_on_reference = $self->reference_nt_for_mismatch_positions;
129            
130 7         8 my @outarray;
131 7         14 my $query_seq = $self->query_seq;
132 7         17 for (my $i = 0; $i < @relative_mismatch_positions_on_reference; $i++){
133 18         15 my $relative_mismatch_position_on_reference = $relative_mismatch_positions_on_reference[$i];
134 18         23 my $deletion_adjustment = _how_many_are_smaller($relative_mismatch_position_on_reference, \@deletion_positions);
135 18         21 my $insertion_adjustment = _how_many_are_smaller($relative_mismatch_position_on_reference, \@insertion_positions);
136            
137 18         14 my $mismatch_position_on_query = $relative_mismatch_position_on_reference - $deletion_adjustment + $insertion_adjustment;
138            
139 18 100       380 if ($self->strand == -1) {
140 2         4 $mismatch_position_on_query = ($self->query_length - 1) - $mismatch_position_on_query;
141             }
142            
143 18         20 my $nt_on_query = substr($query_seq, $mismatch_position_on_query, 1);
144            
145 18         12 my $nt_on_reference = $nts_on_reference[$i];
146            
147 18         61 push @outarray, {'query_nt' => $nt_on_query, 'reference_nt' => $nt_on_reference, 'query_pos' => $mismatch_position_on_query};
148              
149             }
150            
151             # print $outarray[0]->{'query_pos'}."\n";
152 7         63 return @outarray;
153             }
154              
155              
156             #######################################################################
157             ####################### Private Subroutines ########################
158             #######################################################################
159             sub _how_many_are_smaller {
160 66     66   63 my ($value, $array) = @_;
161            
162 66         44 my $count = 0;
163 66         69 foreach my $array_value (@$array) {
164 131 100       166 if ($array_value < $value) {
165 65         50 $count++;
166             }
167             }
168 66         59 return $count;
169             }
170              
171             1;