File Coverage

blib/lib/MyShortRead/ChromBed.pm
Criterion Covered Total %
statement 15 144 10.4
branch 0 46 0.0
condition 0 15 0.0
subroutine 5 13 38.4
pod 0 8 0.0
total 20 226 8.8


line stmt bran cond sub pod time code
1             #
2             # ChromBed - A class for extracting short read information for one chromosome.
3             #
4              
5 1     1   5 use strict;
  1         2  
  1         34  
6 1     1   26 use 5.006;
  1         3  
  1         49  
7             package MyShortRead::ChromBed;
8 1     1   5 use POSIX qw(ceil);
  1         2  
  1         8  
9 1     1   65 use MyShortRead::MyShortRead qw(bin_genome_count bin_genome_count_direction);
  1         2  
  1         48  
10 1     1   6 use MyBioinfo::Common qw(mean median);
  1         1  
  1         1590  
11             our $VERSION = '1.00';
12              
13             # Class constructor.
14             sub new{
15 0     0 0   my $class = shift;
16 0           my $self = {};
17 0           $self->{chrom_name} = undef;
18 0           $self->{chrom_len} = undef;
19 0           $self->{file_name} = undef; # chromosome file name.
20 0           $self->{bin_num} = undef; # number of bins for the chromosome.
21 0           $self->{bin_count} = []; # vector for bin read count.
22 0           $self->{bin_count_F} = []; # bin Forward strand read count.
23 0           $self->{bin_count_R} = []; # bin Reverse strand read count.
24 0           $self->{window_num} = undef; # number of sliding windows.
25 0           $self->{window_count} = []; # vector for window read count.
26 0           $self->{window_count_F} = []; # window read count for Forward strand.
27 0           $self->{window_count_R} = []; # window read count for Reverse strand.
28 0           bless($self, $class);
29              
30             # User supplied chromosome name, length and file name.
31 0 0         if(@_ >= 3){
32 0           $self->{chrom_name} = $_[0];
33 0           $self->{chrom_len} = $_[1];
34 0           $self->{file_name} = $_[2];
35             }
36              
37 0           return $self;
38             }
39              
40             # Set chromosome information: name, length and file name.
41             # Clean memory data.
42             sub set_chr_info{
43 0     0 0   my $self = shift;
44 0 0         if(@_ >= 3){
45 0           $self->{chrom_name} = $_[0];
46 0           $self->{chrom_len} = $_[1];
47 0           $self->{file_name} = $_[2];
48 0           $self->{bin_num} = undef;
49 0           $self->{bin_count} = [];
50             }
51             }
52              
53             # Create bin count vector for the specified chromosome file.
54             sub do_bin_count{
55 0     0 0   my $self = shift;
56 0 0         if(@_ < 2) {warn "Bin or fragment size not specified. Bin count not performed. Exit.\n"; return;}
  0            
  0            
57 0 0 0       if(!(defined $self->{chrom_name} and defined $self->{chrom_len} and defined $self->{file_name})){
      0        
58 0           warn "Chromosome information missing! Cannot continue. Exit.\n";
59 0           return;
60             }
61 0           my($bin_size,$frag_size) = @_;
62             # Read bin size and calculate bin number.
63 0           $self->{bin_num} = ceil($self->{chrom_len} / $bin_size);
64             # Create bin count vector.
65 0           bin_genome_count($self->{file_name},$bin_size,$self->{chrom_len},$frag_size,$self->{bin_count});
66             }
67              
68             # Create bin count with direction for the specified chromosome file.
69             sub do_bin_count_direction{
70 0     0 0   my $self = shift;
71 0 0         if(@_ < 2) {warn "Bin or fragment size not specified. Bin count not performed. Exit.\n"; return;}
  0            
  0            
72 0 0 0       if(!(defined $self->{chrom_name} and defined $self->{chrom_len} and defined $self->{file_name})){
      0        
73 0           warn "Chromosome information missing! Cannot continue. Exit.\n";
74 0           return;
75             }
76 0           my($bin_size,$frag_size) = @_;
77             # Read bin size and calculate bin number.
78 0           $self->{bin_num} = ceil($self->{chrom_len} / $bin_size);
79             # Create bin count vector.
80 0           bin_genome_count_direction($self->{file_name},$bin_size,$self->{chrom_len},$frag_size,$self->{bin_count_F},$self->{bin_count_R});
81             }
82              
83             # Smooth bin count signals by applying window functions.
84             sub smooth_bin_count{
85 0     0 0   my $self = shift;
86 0 0         if(@_ < 2) {warn "Smoothing width or function not specified. Exit.\n"; return;}
  0            
  0            
87 0 0         if(!defined $self->{bin_count}){
88 0           warn "Bin count vector not exists. Do bin count first! Exit.\n";
89 0           return;
90             }
91 0           my($smooth_width,$fun_str) = @_;
92 0 0         if($smooth_width <= 1){
93 0           warn "Smooth width must be larger than one. Exit.\n";
94 0           return;
95             }
96 0 0         if($smooth_width % 2 != 1){
97 0           warn "Smooth width must be odd number. Convert it to nearest odd number.\n";
98 0           $smooth_width++;
99             }
100 0           my $half = ($smooth_width-1)/2;
101 0 0         if($self->{bin_num} <= $half){
102 0           warn "Bin vector too small to calculate smoothed signals.\n";
103 0           return;
104             }
105 0           my @mov_arr; # Array containing bin counts of moving window.
106             # Build the initial moving array.
107 0           for my $i(0..$half) {push @mov_arr, $self->{bin_count}[$i];}
  0            
108 0 0         if($fun_str eq 'mean') {$self->{bin_count}[0] = mean(@mov_arr);}
  0 0          
  0            
109             elsif($fun_str eq 'median') {$self->{bin_count}[0] = median(@mov_arr);}
110             # Iteratively updating moving array and calculate smoothed signals.
111 0           for(my $i = 1; $i < $self->{bin_num}; $i++){
112 0 0         if($i - $half > 0) {shift @mov_arr;} # Remove the left element.
  0            
113 0 0         if($i + $half < $self->{bin_num}) {push @mov_arr, $self->{bin_count}[$i+$half];} # Add new element to the right.
  0            
114 0 0         if($fun_str eq 'mean') {$self->{bin_count}[$i] = mean(@mov_arr);}
  0 0          
  0            
115             elsif($fun_str eq 'median') {$self->{bin_count}[$i] = median(@mov_arr);}
116             }
117             }
118              
119             # Create sliding window count vector.
120             sub create_window_count{
121 0     0 0   my $self = shift;
122 0 0         if(@_ < 1) {warn "Number of bins per window not specified. Exit.\n"; return;}
  0            
  0            
123 0           my $bins_per_window = shift;
124 0 0         if(@{$self->{bin_count}} < $bins_per_window){
  0            
125 0           warn "Bin count vector not long enough to calculate window count.\n";
126 0           return;
127             }
128             # Initialize window count vector.
129 0           $self->{window_num} = $self->{bin_num} - $bins_per_window + 1;
130 0           $#{$self->{window_count}} = $self->{window_num} - 1;
  0            
131             # $mov_window_count is the moving window count.
132 0           my $mov_window_count = 0;
133 0           for my $i(0..$bins_per_window-1) {$mov_window_count += $self->{bin_count}[$i];}
  0            
134 0           $self->{window_count}[0] = $mov_window_count;
135             # $i is the index to current window.
136 0           for(my $i=1; $i < $self->{window_num}; $i++){
137 0           $mov_window_count = $mov_window_count - $self->{bin_count}[$i-1] + $self->{bin_count}[$i-1+$bins_per_window];
138 0           $self->{window_count}[$i] = $mov_window_count;
139             }
140             }
141              
142             # Create sliding window count vector with direction.
143             sub create_window_count_direction{
144 0     0 0   my $self = shift;
145 0 0         if(@_ < 1) {warn "Number of bins per window not specified. Exit.\n"; return;}
  0            
  0            
146 0           my $bins_per_window = shift;
147 0 0 0       if(@{$self->{bin_count_F}} < $bins_per_window or @{$self->{bin_count_R}} < $bins_per_window){
  0            
  0            
148 0           warn "Bin count vector not long enough to calculate window count.\n";
149 0           return;
150             }
151             # Initialize window count vector.
152 0           $self->{window_num} = $self->{bin_num} - $bins_per_window + 1;
153 0           $#{$self->{window_count_F}} = $self->{window_num} - 1;
  0            
154 0           $#{$self->{window_count_R}} = $self->{window_num} - 1;
  0            
155             # $mov_window_count is the moving window count.
156 0           my $mov_window_count_F = 0;
157 0           my $mov_window_count_R = 0;
158 0           for my $i(0..$bins_per_window-1) {$mov_window_count_F += $self->{bin_count_F}[$i];}
  0            
159 0           for my $i(0..$bins_per_window-1) {$mov_window_count_R += $self->{bin_count_R}[$i];}
  0            
160 0           $self->{window_count_F}[0] = $mov_window_count_F;
161 0           $self->{window_count_R}[0] = $mov_window_count_R;
162             # $i is the index to current window.
163 0           for(my $i=1; $i < $self->{window_num}; $i++){
164 0           $mov_window_count_F = $mov_window_count_F - $self->{bin_count_F}[$i-1] + $self->{bin_count_F}[$i-1+$bins_per_window];
165 0           $mov_window_count_R = $mov_window_count_R - $self->{bin_count_R}[$i-1] + $self->{bin_count_R}[$i-1+$bins_per_window];
166 0           $self->{window_count_F}[$i] = $mov_window_count_F;
167 0           $self->{window_count_R}[$i] = $mov_window_count_R;
168             }
169             }
170              
171             # Delete chromosome file but keep memory data.
172             sub del_file{
173 0     0 0   my $self = shift;
174 0 0         if(defined $self->{file_name}){
175 0           my $fn = $self->{file_name};
176 0 0         unlink $fn or warn "Cannot delete file $fn: $!\n";
177             }
178             }
179              
180             1;
181              
182             __END__