File Coverage

blib/lib/GenOO/Data/File/SAM/MDZ.pm
Criterion Covered Total %
statement 33 33 100.0
branch 14 18 77.7
condition n/a
subroutine 4 4 100.0
pod 0 2 0.0
total 51 57 89.4


line stmt bran cond sub pod time code
1             # POD documentation - main docs before the code
2              
3             =head1 NAME
4              
5             GenOO::Data::File::SAM::MDZ - Role - The MD:Z tag in a SAM file
6              
7             =head1 SYNOPSIS
8              
9             This role when consumed requires specific attributes and provides
10             methods to extract information from the MD:Z tag.
11              
12             =head1 DESCRIPTION
13              
14             The cigar string does not usually contain information regarding the deletions.
15             For this the MD:Z tag is usually provided.
16            
17             MD:Z -> String for mismatching positions. Regex: [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)
18            
19             The MD field aims to achieve SNP/indel calling without looking at the reference.
20             For example, a string `10A5^AC6' means from the leftmost reference base in the
21             alignment, there are 10 matches followed by an A on the reference which is
22             different from the aligned read base; the next 5 reference bases are matches
23             followed by a 2bp deletion from the reference; the deleted sequence is AC;
24             the last 6 bases are matches. The MD field ought to match the CIGAR string.
25              
26             =head1 EXAMPLES
27              
28             # Get the location information on the reference sequence
29             $obj_with_role->mismatch_positions_on_reference_calculated_from_mdz; # (534515, 534529, ...)
30              
31             =cut
32              
33             # Let the code begin...
34              
35             package GenOO::Data::File::SAM::MDZ;
36             $GenOO::Data::File::SAM::MDZ::VERSION = '1.5.1';
37              
38             #######################################################################
39             ####################### Load External modules #####################
40             #######################################################################
41 2     2   1406 use Moose::Role;
  2         4  
  2         16  
42 2     2   11491 use namespace::autoclean;
  2         5  
  2         18  
43              
44              
45             #######################################################################
46             ######################## Required attributes ######################
47             #######################################################################
48             requires qw(mdz start);
49              
50              
51             #######################################################################
52             ######################## Interface Methods ########################
53             #######################################################################
54             sub mismatch_positions_on_reference_calculated_from_mdz {
55 18     18 0 22 my ($self) = @_;
56             #Tag: AGTGATGGGA------GGATGTCTCGTCTGTGAGTTACAGCA -> CIGAR: 2M1I7M6D26M
57             # - -
58             #Genome: AG-GCTGGTAGCTCAGGGATGTCTCGTCTGTGAGTTACAGCA -> MD:Z: 3C3T1^GCTCAG26
59              
60 18         23 my @mismatch_positions;
61 18         50 my $mdz = $self->mdz;
62            
63 18 50       57 if ($mdz =~ /\w/) {
64 18         19 my $relative_position = 0;
65 18         36 while ($mdz ne '') {
66 144 100       537 if ($mdz =~ s/^(\d+)//) {
    100          
    50          
67 81         214 $relative_position += $1;
68             }
69             elsif ($mdz =~ s/^\^([A-Z]+)//) {
70 18         27 my $deletion_length = CORE::length($1);
71 18         34 $relative_position += $deletion_length;
72             }
73             elsif ($mdz =~ s/^\w//) {
74 45         1449 push @mismatch_positions, $self->start + $relative_position;
75 45         91 $relative_position += 1;
76             }
77             }
78             }
79            
80 18         94 return @mismatch_positions;
81             }
82              
83             sub reference_nt_for_mismatch_positions {
84 14     14 0 1164 my ($self) = @_;
85             #Tag: AGTGATGGGA------GGATGTCTCGTCTGTGAGTTACAGCA -> CIGAR: 2M1I7M6D26M
86             # - -
87             #Genome: AG-GCTGGTAGCTCAGGGATGTCTCGTCTGTGAGTTACAGCA -> MD:Z: 3C3T1^GCTCAG26
88              
89             #Returns mismatch nt from reference sequence. The mismatch nt is the real nt based on matching orientation. i.e. if the strand is -1 it will return the complement of what MD:Z says
90            
91 14         14 my @mismatch_nt;
92 14         32 my $mdz = $self->mdz;
93            
94 14 50       87 if ($mdz =~ /\w/) {
95 14         12 my $relative_position = 0;
96 14         31 while ($mdz ne '') {
97 122 100       390 if ($mdz =~ s/^(\d+)//) {
    100          
    50          
98 68         116 next;
99             }
100             elsif ($mdz =~ s/^\^([A-Z]+)//) {
101 18         27 next;
102             }
103             elsif ($mdz =~ s/^(\w)//) {
104 36         50 my $nt_on_reference = $1;
105 36 100       1066 if ($self->strand == -1) {
106 4         6 $nt_on_reference =~ tr/ATGCatgc/TACGtacg/;
107             }
108 36         90 push @mismatch_nt, $nt_on_reference;
109             }
110             }
111             }
112            
113 14         85 return @mismatch_nt;
114             }
115              
116             1;