File Coverage

blib/lib/Bio/ViennaNGS/BamStatSummary.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2015-01-05 15:49:29 mtw>
3              
4             package Bio::ViennaNGS::BamStatSummary;
5              
6 1     1   971 use 5.12.0;
  1         8  
  1         34  
7 1     1   3 use version; our $VERSION = qv('0.12_08');
  1         1  
  1         4  
8 1     1   271 use Moose;
  0            
  0            
9             use Carp;
10             use POSIX qw(floor);
11             use Statistics::R;
12             use Data::Dumper;
13             use Path::Class;
14             use namespace::autoclean;
15             use Bio::ViennaNGS::BamStat;
16             use Tie::Hash::Indexed;
17             use File::Basename;
18              
19             has 'data' => (
20             is => 'ro',
21             isa => 'ArrayRef [Bio::ViennaNGS::BamStat]',
22             default => sub { [] },
23             );
24              
25             has 'countStat' => (
26             is => 'rw',
27             isa => 'HashRef',
28             predicate => 'has_countStat',
29             default => sub { {} },
30             # auto_deref => '1',
31             );
32              
33             has 'outpath' => (
34             is => 'rw',
35             isa => 'Str',
36             required => '1',
37             );
38              
39             has 'rlib' => (
40             is => 'rw',
41             isa => 'Str',
42             required => '1',
43             );
44              
45             has 'files' => (
46             is => 'rw',
47             isa => 'ArrayRef',
48             required => 1,
49             predicate => 'has_files',
50             );
51              
52             has 'control_match' => ( # provides stats how many mapped bases match the reference genome
53             is => 'rw',
54             isa => 'Bool',
55             default => '1',
56             predicate => 'has_control_match',
57             );
58              
59             has 'control_clip' => ( # provides stats how many bases are soft or hard clipped
60             is => 'rw',
61             isa => 'Bool',
62             default => '1',
63             predicate => 'has_control_clip',
64             );
65              
66             has 'control_split' => ( # provides stats how many/often mapped reads are split
67             is => 'rw',
68             isa => 'Bool',
69             default => '1',
70             predicate => 'has_control_split',
71             );
72              
73             has 'control_qual' => ( # provides stats on quality of the match
74             is => 'rw',
75             isa => 'Bool',
76             default => '1',
77             predicate => 'has_control_qual',
78             );
79              
80             has 'control_edit' => ( # provides stats on the edit distance between read and mapped reference
81             is => 'rw',
82             isa => 'Bool',
83             default => '1',
84             predicate => 'has_control_edit',
85             );
86              
87             has 'control_flag' => ( # analyses the sam bit flag for qual/strands/pair_vs_single reads
88             is => 'rw',
89             isa => 'Bool',
90             default => '1',
91             predicate => 'has_control_flag',
92             );
93              
94             has 'control_score' => ( # provides stats on per-base quality scores
95             is => 'rw',
96             isa => 'Bool',
97             default => '1',
98             predicate => 'has_control_score',
99             );
100              
101             has 'control_uniq' => ( # gives number and stats of multiplicity of readaligments
102             is => 'rw',
103             isa => 'Bool',
104             default => '1',
105             predicate => 'has_control_uniq',
106             );
107              
108             has 'is_segemehl' => ( # toggles to consider segemehl specific bam feature
109             is => 'rw',
110             isa => 'Bool',
111             default => '0',
112             predicate => 'has_is_segemehl',
113             );
114              
115             sub populate_data {
116             my ($self) = @_;
117             foreach my $bamfile (@{$self->files}){
118             carp ">> processing $bamfile\n";
119             my $bo = Bio::ViennaNGS::BamStat->new(bam => $bamfile);
120             $bo->stat_singleBam();
121             push (@{$self->data}, $bo);
122             }
123             }
124              
125              
126             sub populate_countStat {
127             my ($self) = @_;
128              
129             tie my %hdr, 'Tie::Hash::Indexed';
130             %hdr = (
131             "sample" => "Sample",
132             "total_alignments" => "# Total alignments",
133             "mapped_reads" => "# Mapped reads",
134             "umapped_reads" => "# Unique mapped reads",
135             "mmapped_reads" => "# Multi mapped reads",
136             "aligned_pairs" => "# Aligned in pairs",
137             "aligned_mm" => "# Aligned mate missing",
138             "aligned_se" => "# Aligned single end",
139             "aligned_fwd" => "# Aligned forward strand",
140             "aligned_rev" => "# Aligned reverse strand");
141             ${$self->countStat}{'header'} = \%hdr;
142              
143             foreach my $sample (@{$self->data}){
144             my ($basename,$dir,$ext) = fileparse($$sample{'bam'},qr/\.[^.]*/);
145             ${$self->countStat}{$basename}{'total_alignments'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'total'} );
146             ${$self->countStat}{$basename}{'mapped_reads'} = floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'mapped_reads'} );
147             ${$self->countStat}{$basename}{'umapped_reads'} = floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'uniq_mapped_reads'} );
148             ${$self->countStat}{$basename}{'mmapped_reads'} =
149             floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'mapped_reads'} - $$sample{'data_out'}->{'uniq'}->{'uniq_mapped_reads'} );
150             ${$self->countStat}{$basename}{'aligned_pairs'} = floor(0.5 + ($$sample{'data_out'}->{'aln_count'}->{'mapped_pair'})/2 );
151             ${$self->countStat}{$basename}{'aligned_mm'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'unmapped_pair'} );
152             ${$self->countStat}{$basename}{'aligned_se'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'mapped_single'} );
153             ${$self->countStat}{$basename}{'aligned_fwd'} = floor(0.5 + $$sample{'data_out'}->{'strand'}->{'forward'} );
154             ${$self->countStat}{$basename}{'aligned_rev'} = floor(0.5 + $$sample{'data_out'}->{'strand'}->{'reverse'} );
155             }
156             }
157              
158             sub dump_countStat {
159             my ($self,$how) = @_;
160             my $mn = "mapping_stats.csv";
161             my $fn = file($self->outpath,$mn);
162             open(OUT, "> $fn") or croak "cannot open OUT $!";
163              
164             print OUT join ("\t", values %{$self->countStat->{'header'} })."\n";
165             foreach my $sample (keys %{$self->countStat} ){
166             next if ($sample eq 'header');
167             my @line = ();
168             foreach my $key ( keys %{$self->countStat->{'header'}} ) {
169             if ($key eq 'sample'){
170             push (@line, $sample);
171             next;
172             }
173             push @line, $self->countStat->{$sample}->{$key};
174             }
175             print OUT join ("\t", @line)."\n";
176             }
177              
178             # print Dumper($self->countStat);
179             close (OUT);
180             }
181              
182             sub make_BarPlot{
183             my ($self) = @_;
184             my @Rstat_data_count = ();
185              
186             ## collect data for read.table string
187             push @Rstat_data_count, 'Samples', grep {!/header/} keys %{$self->countStat}; # first line with sample names
188             $Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 1st line
189              
190             push @Rstat_data_count, 'aligned_se';
191             foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 2nd line with single end aln counts
192             push @Rstat_data_count, $self->countStat->{$sample}{'aligned_se'};
193             }
194             $Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 2nd line
195              
196             push @Rstat_data_count, 'aligned_pairs';
197             foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 3rd line with aligned pairs count
198             push @Rstat_data_count, $self->countStat->{$sample}{'aligned_pairs'};
199             }
200             $Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 3rd line
201              
202             push @Rstat_data_count, 'aligned_mm';
203             foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 4th line with incomplete aligned pairs
204             push @Rstat_data_count, $self->countStat->{$sample}{'aligned_mm'};
205             }
206             $Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 4th line
207              
208             ## produce bar plot
209             my $mn = "mapping_stats.pdf";
210             my $fn = file($self->outpath,$mn);
211             my $datastring = join(" ", @Rstat_data_count);
212             $self->plot_barplot($fn, "Mapped reads", $datastring); # produce plot with read.table string input
213             }
214              
215             sub plot_barplot { #plot barplot read.table text string
216             my ($self, $filename,$ylab,$data_string) = @_;
217             my ($bn,$odir,$ext) = fileparse($filename, qr /\..*/);
218             #my $rlibpath = '/usr/bin/R';
219             my $rlibpath = $self->rlib;
220              
221             $filename .= '.pdf' unless ($ext eq 'pdf');
222              
223             my $R = Statistics::R->new();
224             $R->startR;
225             $R->set('rlib', $rlibpath);
226             $R->set('log_dir', $odir);
227             $R->run("pdf('${filename}')") ;
228             $R->run("dat<-read.table(text = \"$data_string\", header = TRUE, row.names=1)") ;
229             $R->run("dat_m<-as.matrix(dat)") ;
230             $R->run("colors<-terrain.colors(nrow(dat_m), alpha = 1)") ;
231             # $R->run("colors<-c('darkolivegreen4','darkolivegreen2','coral1')") ;
232             # $R->run("colors<-hcl(seq(0, 360, length =nrow(dat_m)))") ;
233             $R->run("barplot(dat_m, xlim=c(0,ncol(dat_m)+2), col=colors, legend.text = TRUE, args.legend = list(x = ncol(dat_m) + 2, y=max(colSums(dat_m)), bty = 'n' ), ylab='$ylab', xlab='Samples')") ;
234             $R->run("dev.off()") ;
235             $R->stopR;
236             }
237              
238             1;