File Coverage

blib/lib/diffReps/ChromaModSite.pm
Criterion Covered Total %
statement 39 506 7.7
branch 0 92 0.0
condition 0 51 0.0
subroutine 13 51 25.4
pod 0 9 0.0
total 52 709 7.3


line stmt bran cond sub pod time code
1 1     1   1008 use 5.006;
  1         3  
  1         126  
2             our $VERSION = '1.122';
3              
4             # Class for a chromatin modification site consisting consecutive significant windows.
5             package diffReps::ChromaModSite;
6 1     1   6 use strict;
  1         1  
  1         285  
7 1     1   6 use MyBioinfo::Common;
  1         2  
  1         126  
8 1     1   6 use MyShortRead::SRBed;
  1         1  
  1         2252  
9              
10             our @ISA = qw(diffReps::SlideWindow); # Inherited from 'SlideWindow'.
11              
12             # Constructor.
13             sub new{
14 0     0 0   my $class = shift;
15 0           my $self = {
16             chrom => undef,
17             start => undef,
18             end => undef,
19             siteCenter => undef,
20             retrieved => 0, # boolean tag: whether count data have been retrieved?
21             tr_cnt => [], # region normalized treatment counts.
22             co_cnt => [], # region normalized control counts.
23             tr_cnt_ => [], # normalized treatment counts using #reads.
24             co_cnt_ => [], # normalized control counts using #reads.
25             btr_cnt => [], # normalized background treatment counts.
26             bco_cnt => [], # normalized background control counts.
27             #trEnr => undef, # treatment enrichment vs. background.
28             #coEnr => undef, # control enrichment vs. background.
29             rsumTr => undef, # sum of raw treatment counts.
30             rsumCo => undef, # sum of raw control counts.
31             dirn => undef, # change direction: 'Up' or 'Down'.
32             logFC => undef, # log2 fold change.
33             pval => undef, # P-value.
34             padj => undef, # adjusted P-value.
35             winSta => undef, # best window start.
36             #winTr => [], # window normalized treatment counts.
37             #winCo => [], # window normalized control counts.
38             winFC => undef, # window log2 fold change.
39             winP => undef, # window P-value.
40             winQ => undef # window adjusted P-value.
41             };
42 0           bless $self, $class;
43 0           return $self;
44             }
45              
46             # Copy member data into an anonymous hash table.
47             sub dcopy{
48 0     0 0   my $self = shift;
49 0           my $new = {};
50 0           %{$new} = %{$self};
  0            
  0            
51             # Deep copy array members that are array variables.
52 0           $new->{tr_cnt} = []; @{$new->{tr_cnt}} = @{$self->{tr_cnt}};
  0            
  0            
  0            
53 0           $new->{co_cnt} = []; @{$new->{co_cnt}} = @{$self->{co_cnt}};
  0            
  0            
  0            
54 0           $new->{tr_cnt_} = []; @{$new->{tr_cnt_}} = @{$self->{tr_cnt_}};
  0            
  0            
  0            
55 0           $new->{co_cnt_} = []; @{$new->{co_cnt_}} = @{$self->{co_cnt_}};
  0            
  0            
  0            
56 0           $new->{btr_cnt} = []; @{$new->{btr_cnt}} = @{$self->{btr_cnt}};
  0            
  0            
  0            
57 0           $new->{bco_cnt} = []; @{$new->{bco_cnt}} = @{$self->{bco_cnt}};
  0            
  0            
  0            
58             #$new->{winTr} = []; @{$new->{winTr}} = @{$self->{winTr}};
59             #$new->{winCo} = []; @{$new->{winCo}} = @{$self->{winCo}};
60 0           bless $new, 'diffReps::ChromaModSite';
61 0           return $new;
62             }
63              
64             # Initialize a region by a significant window.
65             sub init{
66 0     0 0   my($self,$gdt,$rwin) = @_;
67 0           $self->{chrom} = $rwin->{chrom};
68 0           $self->{start} = $rwin->{winN}*$gdt->{step} + 1;
69 0           $self->{end} = $rwin->{winN}*$gdt->{step} + $gdt->{winSize};
70 0           $self->{siteCenter} = ($self->{start} + $self->{end}) /2;
71 0           $self->{retrieved} = 1;
72 0           @{$self->{tr_cnt}} = @{$rwin->{tr_cnt}};
  0            
  0            
73 0           @{$self->{co_cnt}} = @{$rwin->{co_cnt}};
  0            
  0            
74 0           @{$self->{tr_cnt_}} = @{$rwin->{tr_cnt_}};
  0            
  0            
75 0           @{$self->{co_cnt_}} = @{$rwin->{co_cnt_}};
  0            
  0            
76 0           @{$self->{btr_cnt}} = @{$rwin->{btr_cnt}};
  0            
  0            
77 0           @{$self->{bco_cnt}} = @{$rwin->{bco_cnt}};
  0            
  0            
78 0           $self->{rsumTr} = $rwin->{rsumTr};
79 0           $self->{rsumCo} = $rwin->{rsumCo};
80 0           $self->{dirn} = $rwin->{dirn};
81 0           $self->{logFC} = $rwin->{logFC};
82 0           $self->{pval} = $rwin->{pval};
83 0           $self->{winSta} = $self->{start};
84             #@{$self->{winTr}} = @{$rwin->{tr_cnt}};
85             #@{$self->{winCo}} = @{$rwin->{co_cnt}};
86 0           $self->{winFC} = $rwin->{logFC};
87 0           $self->{winP} = $rwin->{pval};
88             }
89              
90             # Expand the genomic coordinates. Replace best window if necessary.
91             # Do not retrieve counts and sums to save some computation.
92             sub expand{
93 0     0 0   my($self,$gdt,$rwin) = @_;
94 0           $self->{end} = $rwin->{winN}*$gdt->{step} + $gdt->{winSize}; # new region end.
95 0           $self->{siteCenter} = ($self->{start} + $self->{end}) /2;
96             # Invalidate previous diff results.
97 0           $self->reset_content(1); # reset everything but keep direction.
98             # $self->{tr_cnt} = [];
99             # $self->{co_cnt} = [];
100             # $self->{rsumTr} = undef;
101             # $self->{rsumCo} = undef;
102             # $self->{logFC} = undef;
103             # Replace best window if necessary.
104 0 0         if($rwin->{pval} < $self->{winP}){
105 0           $self->{winSta} = $rwin->{winN}*$gdt->{step} + 1;
106             #@{$self->{winTr}} = @{$rwin->{tr_cnt}};
107             #@{$self->{winCo}} = @{$rwin->{co_cnt}};
108 0           $self->{winFC} = $rwin->{logFC};
109 0           $self->{winP} = $rwin->{pval};
110             }
111             }
112              
113             # Print a formatted record of the current region. Do nothing if the P-val is not defined.
114             sub print_reg{
115 0     0 0   my($self,$gdt,$h) = @_;
116 0 0         if(defined $self->{pval}){
117             # Concatenate treatment and control counts for output.
118 0           my @tr_cnt_fmt = fprecision(2, @{$self->{tr_cnt}});
  0            
119 0           my @co_cnt_fmt = fprecision(2, @{$self->{co_cnt}});
  0            
120 0           my $tr_cnt_str = join(';', @tr_cnt_fmt);
121 0           my $co_cnt_str = join(';', @co_cnt_fmt);
122 0           my($tr_bkg_enr, $co_bkg_enr);
123 0 0         if($gdt->{bkgEnr}){
124 0           my $btr_m = mean(@{$self->{btr_cnt}});
  0            
125 0           my $bco_m = mean(@{$self->{bco_cnt}});
  0            
126 0 0         $tr_bkg_enr = $btr_m > 0? fprecision(2, mean(@{$self->{tr_cnt_}}) / $btr_m) : INFINITE;
  0            
127 0 0         $co_bkg_enr = $bco_m > 0? fprecision(2, mean(@{$self->{co_cnt_}}) / $bco_m) : INFINITE;
  0            
128             }else{
129 0           $tr_bkg_enr = 'NA';
130 0           $co_bkg_enr = 'NA';
131             }
132             # Output: chrom,start,end,length,(treatment read count),(control read count),direction,logFC,pval,padj,
133             # (window start),(window end),(window logFC),(window pval),(window padj).
134 0           print $h join("\t", ($self->{chrom},
135             $self->{start}, $self->{end}, $self->{end}-$self->{start}+1, # region coordinates.
136             $tr_cnt_str, $co_cnt_str, # formatted read counts.
137 0           fprecision(2, &mean(@{$self->{tr_cnt}})), fprecision(2, &mean(@{$self->{co_cnt}})), # avg read count.
  0            
138             $tr_bkg_enr, $co_bkg_enr, # enrichment vs. background.
139             $self->{dirn}, fprecision(2,$self->{logFC}), # change direction and logFC.
140             $self->{pval}, $self->{padj}, # diff analysis info.
141             $self->{winSta}, $self->{winSta}+$gdt->{winSize}-1, # best window coordinates.
142             fprecision(2,$self->{winFC}),
143             $self->{winP}, $self->{winQ})), "\n"; # best window diff info.
144             }
145             }
146              
147             # Do we need to perform stat test for the region?
148             # If the region contains only one window, this is already done.
149             sub needStat{
150 0     0 0   my $self = shift;
151 0   0       return (defined($self->{winP}) && !defined($self->{pval}));
152             }
153              
154             # Retrieve normalized read counts for the region. Override parent function.
155             sub retrieve_norm_cnt{
156 0     0 0   my($self,$gdt) = @_;
157 0           my @atr_cnt; # array treatment counts.
158 0           my $i = 0; # iterator for normalization constants.
159 0           foreach my $b(@{$gdt->{bedTr}}) {
  0            
160 0           push @atr_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end})
161             / $gdt->{normTr}[$i++]);
162             }
163 0           my @aco_cnt; # array control counts.
164 0           $i = 0;
165 0           foreach my $b(@{$gdt->{bedCo}}) {
  0            
166 0           push @aco_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end})
167             / $gdt->{normCo}[$i++]);
168             }
169 0           @{$self->{tr_cnt}} = @atr_cnt;
  0            
170 0           @{$self->{co_cnt}} = @aco_cnt;
  0            
171             # Retrieve norm count for background if needed.
172 0 0         if($gdt->{bkgEnr}){
173 0           my @atr_cnt_; # array treatment counts.
174 0           $i = 0; # iterator for normalization constants.
175 0           foreach my $b(@{$gdt->{bedTr}}) {
  0            
176 0           push @atr_cnt_, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normTr_}->[$i++]);
177             }
178 0           @{$self->{tr_cnt_}} = @atr_cnt_;
  0            
179 0           my @aco_cnt_; # array treatment counts.
180 0           $i = 0; # iterator for normalization constants.
181 0           foreach my $b(@{$gdt->{bedCo}}) {
  0            
182 0           push @aco_cnt_, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normCo_}->[$i++]);
183             }
184 0           @{$self->{co_cnt_}} = @aco_cnt_;
  0            
185 0 0         if(@{$gdt->{bedBtr}}){
  0            
186 0           my @abtr_cnt; # array background treatment counts.
187 0           $i = 0; # iterator for normalization constants.
188 0           foreach my $b(@{$gdt->{bedBtr}}) {
  0            
189 0           push @abtr_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normBtr}->[$i++]);
190             }
191 0           @{$self->{btr_cnt}} = @abtr_cnt;
  0            
192             }
193 0 0         if(@{$gdt->{bedBco}}){
  0            
194 0           my @abco_cnt; # array background treatment counts.
195 0           $i = 0; # iterator for normalization constants.
196 0           foreach my $b(@{$gdt->{bedBco}}) {
  0            
197 0           push @abco_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normBco}->[$i++]);
198             }
199 0           @{$self->{bco_cnt}} = @abco_cnt;
  0            
200             }
201 0 0         if(@{$self->{btr_cnt}} == 0){
  0            
202 0           @{$self->{btr_cnt}} = @{$self->{bco_cnt}};
  0            
  0            
203             }
204 0 0         if(@{$self->{bco_cnt}} == 0){
  0            
205 0           @{$self->{bco_cnt}} = @{$self->{btr_cnt}};
  0            
  0            
206             }
207             }
208             }
209              
210             # Retrieve sum of raw read counts for the region. Override parent function.
211             sub retrieve_raw_sum{
212 0     0 0   my($self,$gdt) = @_;
213 0           $self->{rsumTr} = 0;
214 0           foreach my $b(@{$gdt->{bedTr}}) {
  0            
215 0           $self->{rsumTr} += $b->get_region_count($self->{chrom}, $self->{start}, $self->{end});
216             }
217 0           $self->{rsumCo} = 0;
218 0           foreach my $b(@{$gdt->{bedCo}}) {
  0            
219 0           $self->{rsumCo} += $b->get_region_count($self->{chrom}, $self->{start}, $self->{end});
220             }
221             }
222              
223             # Determine whether a window is within gap.
224             sub isWithinGap{
225 0     0 0   my($self,$gdt,$rwin) = @_;
226 0 0         return 0 if !defined $self->{chrom};
227 0           my $winSta = $rwin->{winN}*$gdt->{step}+1;
228 0   0       return ($winSta - $self->{end}-1 <= $gdt->{gap} and $rwin->{dirn} eq $self->{dirn});
229             }
230              
231              
232             # Class for a sliding window of fixed size.
233             package diffReps::SlideWindow;
234              
235 1     1   7 use strict;
  1         2  
  1         37  
236 1     1   1021 use Statistics::TTest;
  1         41125  
  1         33  
237 1     1   741 use MyBioinfo::NBTest;
  1         3  
  1         60  
238 1     1   7 use MyBioinfo::Common;
  1         2  
  1         104  
239 1     1   1058 use MyBioinfo::Math qw( gtest_gof chisqtest_gof );
  1         4  
  1         82  
240 1     1   8 use MyShortRead::SRBed;
  1         2  
  1         2758  
241              
242             # Constructor.
243             sub new{
244 0     0     my $class = shift;
245 0           my $self = {
246             chrom => undef,
247             winN => undef, # window number to retrieve count from SRBed class.
248             maxN => undef, # maximum window number.
249             retrieved => 0, # boolean tag: whether count data have been retrieved?
250             tr_cnt => [], # normalized treatment counts.
251             co_cnt => [], # normalized control counts.
252             tr_cnt_ => [], # normalized treatment counts using #reads.
253             co_cnt_ => [], # normalized control counts using #reads.
254             btr_cnt => [], # normalized background treatment counts.
255             bco_cnt => [], # normalized background control counts.
256             rsumTr => undef, # sum of raw treatment counts.
257             rsumCo => undef, # sum of raw control counts.
258             dirn => undef, # change direction: 'Up' or 'Down'.
259             logFC => undef, # log2 fold change.
260             pval => undef # P-value.
261             };
262 0           bless $self, $class;
263 0           return $self;
264             }
265              
266             # Set the current chromosome name, max window number and reset everything.
267             # Obtain counts and sums for the first window.
268             sub set_chrom{
269 0     0     my($self,$gdt,$chrom,$maxN) = @_;
270 0           $self->{chrom} = $chrom;
271 0           $self->{maxN} = $maxN;
272 0           $self->{winN} = 0;
273 0           $self->reset_content;
274             #$self->{retrieved} = 0;
275             # $self->retrieve_norm_cnt($gdt);
276             # $self->retrieve_raw_sum($gdt);
277             #$self->{trEnr} = undef;
278             #$self->{coEnr} = undef;
279             # $self->{dirn} = undef; # change direction: 'Up' or 'Down'.
280             # $self->{logFC} = undef; # log2 fold change.
281             # $self->{pval} = undef; # P-value.
282             }
283              
284             # Reset window number.
285             sub reset_winN{
286 0     0     my $self = shift;
287 0           $self->{winN} = 0;
288             }
289              
290             # Reset window/region content.
291             sub reset_content{
292 0     0     my($self,$keepdi) = @_;
293 0           $self->{retrieved} = 0; # boolean tag: whether count data have been retrieved?
294 0           $self->{tr_cnt} = []; # normalized treatment counts.
295 0           $self->{co_cnt} = []; # normalized control counts.
296 0           $self->{tr_cnt_} = []; # normalized treatment counts using #reads.
297 0           $self->{co_cnt_} = []; # normalized control counts using #reads.
298 0           $self->{btr_cnt} = []; # normalized background treatment counts.
299 0           $self->{bco_cnt} = []; # normalized background control counts.
300 0           $self->{rsumTr} = undef; # sum of raw treatment counts.
301 0           $self->{rsumCo} = undef; # sum of raw control counts.
302 0 0         $self->{dirn} = undef unless $keepdi; # change direction: 'Up' or 'Down'.
303 0           $self->{logFC} = undef; # log2 fold change.
304 0           $self->{pval} = undef; # P-value.
305             }
306              
307             # Retrieve norm/raw count given current window/region location.
308             sub retrieve_new_cnt{
309 0     0     my($self,$gdt) = @_;
310 0           $self->{retrieved} = 1;
311 0           $self->retrieve_norm_cnt($gdt);
312 0           $self->retrieve_raw_sum($gdt);
313             }
314              
315             # Shift the window one position to the right and obtain new counts and sums.
316             sub move_forward{
317 0     0     my($self,$gdt) = @_;
318 0 0         if($self->{winN} <= $self->{maxN}-1) {
  0            
319 0           $self->{winN}++;
320 0           $self->reset_content;
321             # $self->retrieve_norm_cnt($gdt);
322             # $self->retrieve_raw_sum($gdt);
323             #$self->{trEnr} = undef;
324             #$self->{coEnr} = undef;
325             #$self->{dirn} = undef; # change direction: 'Up' or 'Down'.
326             #$self->{logFC} = undef; # log2 fold change.
327             #$self->{pval} = undef; # P-value.
328 0           return $self->{winN};
329             }
330             # elsif($self->{winN} == $self->{maxN}-1){
331             # $self->{winN} = $self->{maxN};
332             # $self->{tr_cnt} = [];
333             # $self->{co_cnt} = [];
334             # $self->{tr_cnt_} = [];
335             # $self->{co_cnt_} = [];
336             # $self->{btr_cnt} = [];
337             # $self->{bco_cnt} = [];
338             # #$self->{trEnr} = undef;
339             # #$self->{coEnr} = undef;
340             # $self->{dirn} = undef; # change direction: 'Up' or 'Down'.
341             # $self->{logFC} = undef; # log2 fold change.
342             # $self->{pval} = undef; # P-value.
343             # return $self->{winN};
344             # }
345             else {return $self->{maxN};}
346             # Max valid window index should be maxN-1.
347             }
348              
349             # Return current window number.
350             sub get_winN{
351 0     0     my $self = shift;
352 0 0         if($self->{winN} < $self->{maxN}) {return $self->{winN};}
  0            
  0            
353             else {return -1;}
354             }
355              
356             # Return chromosome name.
357             sub get_chrom{
358 0     0     my $self = shift;
359 0           return $self->{chrom};
360             }
361              
362             # Get normalized read counts for external procedures.
363             sub get_cnt_tr{
364 0     0     my $self = shift;
365 0           return @{$self->{tr_cnt}};
  0            
366             }
367             sub get_cnt_co{
368 0     0     my $self = shift;
369 0           return @{$self->{co_cnt}};
  0            
370             }
371             sub get_cnt_tr_{
372 0     0     my $self = shift;
373 0           return @{$self->{tr_cnt_}};
  0            
374             }
375             sub get_cnt_co_{
376 0     0     my $self = shift;
377 0           return @{$self->{co_cnt_}};
  0            
378             }
379             sub get_cnt_btr{
380 0     0     my $self = shift;
381 0           return @{$self->{btr_cnt}};
  0            
382             }
383             sub get_cnt_bco{
384 0     0     my $self = shift;
385 0           return @{$self->{bco_cnt}};
  0            
386             }
387              
388             # Get differential analysis info.
389             sub get_diff_info{
390 0     0     my $self = shift;
391             return {
392 0           dirn => $self->{dirn},
393             logFC => $self->{logFC},
394             pval => $self->{pval}
395             }; # anonymous hash.
396             }
397              
398             # Retrieve normalized read counts by current window number from SRBed objects.
399             sub retrieve_norm_cnt{
400 0     0     my($self,$gdt) = @_;
401 0           my @atr_cnt; # array treatment counts.
402 0           my $i = 0; # iterator for normalization constants.
403 0           foreach my $b(@{$gdt->{bedTr}}) {
  0            
404 0           push @atr_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normTr}->[$i++]);
405             }
406 0           @{$self->{tr_cnt}} = @atr_cnt;
  0            
407 0           my @aco_cnt; # array control counts.
408 0           $i = 0;
409 0           foreach my $b(@{$gdt->{bedCo}}) {
  0            
410 0           push @aco_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normCo}->[$i++]);
411             }
412 0           @{$self->{co_cnt}} = @aco_cnt;
  0            
413             # Retrieve norm count for background if needed.
414 0 0         if($gdt->{bkgEnr}){
415 0           my @atr_cnt_; # array treatment counts.
416 0           $i = 0; # iterator for normalization constants.
417 0           foreach my $b(@{$gdt->{bedTr}}) {
  0            
418 0           push @atr_cnt_, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normTr_}->[$i++]);
419             }
420 0           @{$self->{tr_cnt_}} = @atr_cnt_;
  0            
421 0           my @aco_cnt_; # array treatment counts.
422 0           $i = 0; # iterator for normalization constants.
423 0           foreach my $b(@{$gdt->{bedCo}}) {
  0            
424 0           push @aco_cnt_, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normCo_}->[$i++]);
425             }
426 0           @{$self->{co_cnt_}} = @aco_cnt_;
  0            
427 0 0         if(@{$gdt->{bedBtr}}){
  0            
428 0           my @abtr_cnt; # array background treatment counts.
429 0           $i = 0; # iterator for normalization constants.
430 0           foreach my $b(@{$gdt->{bedBtr}}) {
  0            
431 0           push @abtr_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normBtr}->[$i++]);
432             }
433 0           @{$self->{btr_cnt}} = @abtr_cnt;
  0            
434             }
435 0 0         if(@{$gdt->{bedBco}}){
  0            
436 0           my @abco_cnt; # array background treatment counts.
437 0           $i = 0; # iterator for normalization constants.
438 0           foreach my $b(@{$gdt->{bedBco}}) {
  0            
439 0           push @abco_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normBco}->[$i++]);
440             }
441 0           @{$self->{bco_cnt}} = @abco_cnt;
  0            
442             }
443 0 0         if(@{$self->{btr_cnt}} == 0){
  0            
444 0           @{$self->{btr_cnt}} = @{$self->{bco_cnt}};
  0            
  0            
445             }
446 0 0         if(@{$self->{bco_cnt}} == 0){
  0            
447 0           @{$self->{bco_cnt}} = @{$self->{btr_cnt}};
  0            
  0            
448             }
449             }
450             }
451              
452             # Retrieve sum of raw read counts by current window number from SRBed objects.
453             sub retrieve_raw_sum{
454 0     0     my($self,$gdt) = @_;
455 0           $self->{rsumTr} = 0;
456 0           foreach my $b(@{$gdt->{bedTr}}) {
  0            
457 0           $self->{rsumTr} += $b->get_win_count($self->{chrom}, $self->{winN});
458             }
459 0           $self->{rsumCo} = 0;
460 0           foreach my $b(@{$gdt->{bedCo}}) {
  0            
461 0           $self->{rsumCo} += $b->get_win_count($self->{chrom}, $self->{winN});
462             }
463             }
464              
465             # Determine whether current bin counts are above cutoff or not.
466             sub isAboveCutoff{
467 0     0     my($self,$gdt) = @_;
468 0           die "Empty SRBed vectors encountered in function: isAboveCutoff.\n"
469 0 0 0       if @{$gdt->{bedTr}} == 0 or @{$gdt->{bedCo}} == 0;
  0            
470 0           my $tagTr = 1;
471 0           my $tagCo = 1;
472 0 0 0       if($gdt->{bkgEnr} and $gdt->{useBkg} > 0){ # use background as filter.
473 0 0         if(!$self->{retrieved}){ # retrieve new count if not done so.
474 0           $self->retrieve_new_cnt($gdt);
475             }
476 0           my $btr_m = mean(@{$self->{btr_cnt}});
  0            
477 0           my $bco_m = mean(@{$self->{bco_cnt}});
  0            
478 0           foreach my $c(@{$self->{tr_cnt_}}){
  0            
479 0 0 0       if(!$c or ($c and $btr_m and $c / $btr_m <= $gdt->{useBkg})){
      0        
      0        
480 0           $tagTr = 0;
481 0           last;
482             }
483             }
484 0 0         if(!$tagTr){
485 0           foreach my $c(@{$self->{co_cnt_}}){
  0            
486 0 0 0       if(!$c or ($c and $bco_m and $c / $bco_m <= $gdt->{useBkg})){
      0        
      0        
487 0           $tagCo = 0;
488 0           last;
489             }
490             }
491             }
492             }else{ # use raw count and equation as filter.
493 0           my $i = 0;
494 0           foreach my $b(@{$gdt->{bedTr}}){
  0            
495 0           my $rawcnt = $b->get_win_count($self->{chrom}, $self->{winN});
496 0 0 0       if(!$rawcnt or $rawcnt <= $gdt->{meanTr}[$i] + $gdt->{nsd}*$gdt->{stdTr}[$i]){
497 0           $tagTr = 0;
498 0           last;
499             }
500 0           $i++;
501             }
502 0 0         if(!$tagTr){
503 0           $i = 0;
504 0           foreach my $b(@{$gdt->{bedCo}}){
  0            
505 0           my $rawcnt = $b->get_win_count($self->{chrom}, $self->{winN});
506 0 0 0       if(!$rawcnt or $rawcnt <= $gdt->{meanCo}[$i] + $gdt->{nsd}*$gdt->{stdCo}[$i]){
507 0           $tagCo = 0;
508 0           last;
509             }
510 0           $i++;
511             }
512             }
513             }
514 0   0       return ($tagTr || $tagCo);
515             }
516              
517             # Calculate statistical scores based on current window counts.
518             sub calc_stat{
519 0     0     my($self,$gdt,$meth,$eps) = @_;
520             #return $self->{pval} if defined $self->{pval}; # no duplicated calculation please.
521 0 0         return 1 if !defined($self->{chrom}); # do nothing if the region is not defined.
522 0 0         if(!$self->{retrieved}){ # retrieve new count if not done so.
523 0           $self->retrieve_new_cnt($gdt);
524             }
525             # Check whether counts and sums are ready for stat test.
526 0           die "Empty count vectors encountered when doing stat test.\n"
527 0 0 0       if @{$self->{tr_cnt}} == 0 or @{$self->{co_cnt}} == 0;
  0            
528 0 0 0       die "Undefined raw sums for treatment or control when doing stat test.\n"
529             if !defined($self->{rsumTr}) or !defined($self->{rsumCo});
530             # Noramlized mean count for each group.
531 0           my $tr_m = mean(@{$self->{tr_cnt}});
  0            
532 0           my $co_m = mean(@{$self->{co_cnt}});
  0            
533             # Special case: zero count for both groups.
534 0 0 0       if($tr_m == 0 and $co_m == 0) {
535 0           $self->{logFC} = 0;
536 0           $self->{dirn} = 'Nons';
537 0           $self->{pval} = 1;
538             }
539             else {
540 0 0         if($co_m == 0) {$self->{logFC} = INFINITE;}
  0            
  0            
541             else {$self->{logFC} = log2($tr_m/$co_m);}
542 0 0         $self->{dirn} = $self->{logFC} > 0? 'Up' : 'Down';
543             # Perform statistical tests to find P-value.
544 0 0 0       if($meth eq 'tt'){ # T-test.
    0          
    0          
545 0           my $ttest = new Statistics::TTest;
546 0           $ttest->load_data(\@{$self->{tr_cnt}}, \@{$self->{co_cnt}});
  0            
  0            
547 0           $self->{pval} = $ttest->t_prob;
548             }
549             elsif($meth eq 'nb'){ # Negative Binomial test.
550 0           my $tr_var = var(@{$self->{tr_cnt}});
  0            
551 0           my $co_var = var(@{$self->{co_cnt}});
  0            
552              
553 0           my $tr_raw_m = raw_sum_mean($tr_m,$co_m,$gdt->{normTr});
554 0           my $co_raw_m = raw_sum_mean($tr_m,$co_m,$gdt->{normCo});
555 0           my $tr_raw_v = raw_sum_var($tr_var,$tr_m,$co_m,$gdt->{normTr},$tr_raw_m,$eps);
556 0           my $co_raw_v = raw_sum_var($co_var,$tr_m,$co_m,$gdt->{normCo},$co_raw_m,$eps);
557              
558 0           $self->{pval} = nb_pval($self->{rsumTr},$self->{rsumCo},$tr_raw_m,$tr_raw_v,$co_raw_m,$co_raw_v,$eps);
559             }
560             elsif($meth eq 'gt' or $meth eq 'cs'){
561 0           my $tr_sum = sum(@{$self->{tr_cnt}});
  0            
562 0           my $co_sum = sum(@{$self->{co_cnt}});
  0            
563 0           my $a_cnt = [$tr_sum, $co_sum]; # ref to array of normalized counts.
564 0           my $tr_rep = scalar @{$self->{tr_cnt}};
  0            
565 0           my $co_rep = scalar @{$self->{co_cnt}};
  0            
566 0           my $tot_rep = $tr_rep + $co_rep;
567 0           my $a_p = [$tr_rep/$tot_rep, $co_rep/$tot_rep]; # ref to array of probabilities.
568 0           my @test_res; # result vector: statistic, DOF, p-value.
569 0 0         if($meth eq 'gt'){
570 0           @test_res = gtest_gof($a_cnt, $a_p);
571             }else{
572 0           @test_res = chisqtest_gof($a_cnt, $a_p);
573             }
574 0           $self->{pval} = $test_res[2];
575             }
576             else{
577 0           die "Undefined statistical test! Halt execution.\n";
578             }
579             }
580 0           return $self->{pval};
581             }
582              
583              
584             # Subroutines for NB test. Copied from Common.pm. Written by Ying Jin.
585             # NOTES: In the original diffPermute program, norm constants were multiplied to
586             # raw counts to get normalized values. This only saves ignorable execution time
587             # but causes confusion. They are now reversed, so do the following subroutines.
588             sub raw_sum_mean{
589 0     0     my ($m1,$m2,$norm_ref)=@_;
590 0           my $v = 0;
591 0           for my $i(@{$norm_ref}){
  0            
592 0           $v += $i;
593             }
594            
595 0           return $v*($m1+$m2)/2;
596             }
597             sub raw_sum_var{
598 0     0     my ($base_var,$m1,$m2,$norm,$raw_mean,$eps)=@_;
599 0           my $base_mean = ($m1+$m2)/2;
600 0           my $z =0;
601 0           my $s = 0;
602 0           for my $i(@{$norm}){
  0            
603 0           $z += 1/$i;
604 0           $s += $i**2;
605             }
606 0           $z = $z/@{$norm};
  0            
607            
608 0           my $var = $base_var - $z * $base_mean;
609 0 0         if($var < $eps*$base_mean){
610 0           $var = $eps*$base_mean;
611             }
612            
613 0           return $var*$s + $raw_mean;
614            
615             }
616             ###################################
617              
618              
619              
620             # Class for managing a list of significant regions: add, adjust P-values, output.
621             package diffReps::RegionList;
622 1     1   10 use strict;
  1         2  
  1         41  
623 1     1   6 use MyBioinfo::Common;
  1         2  
  1         157  
624 1     1   7 use diffReps::DiffRes qw( cmpsite );
  1         2  
  1         979  
625              
626             # Constructor.
627             sub new{
628 0     0     my $class = shift;
629 0           my $self = {
630             regList => [], # region list.
631             adjusted => 0 # bool tag for P-value adjustment.
632             };
633 0           bless $self, $class;
634 0           return $self;
635             }
636              
637             # Add a significant region into list if the P-value is defined.
638             sub add{
639 0     0     my($self,$rreg) = @_;
640 0 0         if(defined $rreg->{pval}){
641 0           push @{$self->{regList}}, $rreg->dcopy();
  0            
642             }
643             }
644              
645             # Add a significant region into list if the P-value is defined.
646             # Delete the original region.
647             sub add_del{
648 0     0     my($self,$rreg) = @_;
649 0 0         if(defined $rreg->{pval}){
650 0           push @{$self->{regList}}, $rreg->dcopy();
  0            
651 0           %{$rreg} = %{new diffReps::ChromaModSite};
  0            
  0            
652             }
653             }
654              
655             # Append another region list to the end.
656             sub append_list{
657 0     0     my($self,$rl) = @_;
658 0 0         if(@{$rl->{'regList'}}){
  0            
659 0           push @{$self->{'regList'}}, @{$rl->{'regList'}};
  0            
  0            
660 0           $self->{'adjusted'} = 0;
661             }
662             }
663              
664             # Sort all differential sites according to genomic coordinates.
665             sub sort{
666 0     0     my $self = shift;
667 0           @{$self->{regList}} = sort cmpsite @{$self->{regList}};
  0            
  0            
668             }
669              
670             # Adjust P-values for all regions in the list.
671             sub adjPval{
672 0     0     my $self = shift;
673 0 0         return if $self->{adjusted};
674 0           my $N; # the 'N' in BH formula.
675 0 0         if(@_ > 0) {$N = shift;}
  0            
  0            
676 0           else {$N = @{$self->{regList}};}
677             # Extract all P-values from the list and feed them to an adjustment procedure.
678 0           my(@p1, @p2); # pair of region and window P-values.
679 0           foreach my $r(@{$self->{regList}}) {
  0            
680 0           push @p1, $r->{pval};
681 0           push @p2, $r->{winP};
682             }
683 0           my @q1 = padjBH(\@p1, $N);
684 0           my @q2 = padjBH(\@p2, $N);
685 0           my $i = 0; # iterator for adjusted P-value vector.
686 0           foreach my $r(@{$self->{regList}}) {
  0            
687 0           $r->{padj} = $q1[$i];
688 0           $r->{winQ} = $q2[$i++];
689             }
690             # Set bool tag.
691 0           $self->{adjusted} = 1;
692             }
693              
694             # Print header line to file handle.
695             sub gen_header{
696 0     0     my($self,$hrep) = @_;
697             # Print header line.
698 0           print $hrep "Chrom\tStart\tEnd\tLength\tTreatment.cnt\tControl.cnt\tTreatment.avg\tControl.avg\tTreatment.enr\tControl.enr\tEvent\tlog2FC\tpval\tpadj\twinSta\twinEnd\twinFC\twinP\twinQ\n";
699             }
700              
701             # Output all formatted regions.
702             sub output{
703 0     0     my($self,$gdt,$h) = @_;
704 0 0         if(!$self->{adjusted}) {$self->adjPval();}
  0            
705 0           foreach my $r(@{$self->{regList}}) {$r->print_reg($gdt,$h);}
  0            
  0            
706             }
707              
708              
709              
710              
711             # Preloaded methods go here.
712              
713             # Autoload methods go after =cut, and are processed by the autosplit program.
714              
715             1;
716             __END__