File Coverage

blib/lib/MyShortRead/SRBed.pm
Criterion Covered Total %
statement 12 169 7.1
branch 0 60 0.0
condition 0 6 0.0
subroutine 4 24 16.6
pod 0 20 0.0
total 16 279 5.7


line stmt bran cond sub pod time code
1             # SRBed - A class for manipulation of a short read dataset in BED file.
2             # Bins - are non-overlapping windows that span the whole genome.
3             # Windows - are sliding windows with fixed size. A sliding step equals the bin size.
4             # Sliding window: when implementing a sliding window procedure, the whole genome is
5             # first separated into non-overlapping bins of the size of the sliding step. Then a
6             # window count is calculated for a continuous block of bins.
7             # window_size must be multiple times of bin_size, i.e. window_size % bin_size = 0.
8              
9              
10 1     1   8 use strict;
  1         2  
  1         44  
11 1     1   31 use 5.006;
  1         14  
  1         67  
12             package MyShortRead::SRBed;
13             our $VERSION = '1.00';
14              
15 1     1   588 use MyShortRead::MyShortRead qw(sep_chrom_bed);
  1         3  
  1         92  
16 1     1   601 use MyShortRead::ChromBed;
  1         2  
  1         2027  
17              
18             # Class constructor.
19             sub new{
20 0     0 0   my $class = shift;
21 0           my $self = {};
22 0           $self->{file_name} = undef;
23 0           $self->{bin_size} = 1000;
24 0           $self->{window_size} = 1000;
25 0           $self->{frag_size} = 100;
26 0           $self->{chr_list} = {}; # hash table: chrom -> ChromBed
27 0           bless($self, $class);
28              
29             # User supplied bed file name.
30 0 0         if(@_ >= 1) {$self->{file_name} = $_[0];}
  0            
31 0 0         if(@_ >= 2) {$self->{bin_size} = $_[1];}
  0            
32 0 0         if(@_ >= 3){$self->{frag_size} = $_[2];}
  0            
33              
34 0           return $self;
35             }
36              
37             sub set_file_name{
38 0     0 0   my $self = shift;
39 0 0         if(@_ >= 1) {$self->{file_name} = $_[0];}
  0            
40             }
41              
42             sub set_bin_size{
43 0     0 0   my $self = shift;
44 0 0         if(@_ >= 1) {$self->{bin_size} = $_[0];}
  0            
45             }
46              
47             sub set_window_size{
48 0     0 0   my $self = shift;
49 0 0         if(@_ >= 1) {$self->{window_size} = $_[0];}
  0            
50             }
51              
52             sub set_frag_size{
53 0     0 0   my $self = shift;
54 0 0         if(@_ >= 1) {$self->{frag_size} = $_[0];}
  0            
55             }
56              
57             # Create hash table of chrom name to ChromBed object given a chrlen hash table.
58             sub create_chr_list{
59 0     0 0   my $self = shift;
60 0           my $rtbl = $_[0]; # chrlen hash table.
61             # Separate the whole bed file into trunks of chromosomes.
62 0           my($nread, $hsh_chrfile) = sep_chrom_bed($self->{file_name}, keys %{$rtbl});
  0            
63             # For each chromosome, create a ChromBed object.
64 0           while(my($chrom,$chrfile) = each %{$hsh_chrfile}){
  0            
65 0           my $chrlen = $rtbl->{$chrom};
66 0           my $chrbed = new MyShortRead::ChromBed($chrom,$chrlen,$chrfile);
67             # $chrbed->do_bin_count($self->{bin_size},$self->{frag_size});
68 0           $self->{chr_list}{$chrom} = $chrbed; # add the new obj to hash table.
69             }
70 0           return $nread;
71             }
72              
73             # Do bin count for one chromosome.
74             sub chr_bin_count{
75 0     0 0   my $self = shift;
76 0           my $chrom = shift;
77 0 0         if(exists $self->{chr_list}{$chrom}){
  0            
78 0           $self->{chr_list}{$chrom}->do_bin_count($self->{bin_size},$self->{frag_size});
79             }
80             else {warn "Chromosome name not exists! Bin count not done.\n";}
81             }
82              
83             # Do bin count for one chromosome with direction.
84             sub chr_bin_count_direction{
85 0     0 0   my $self = shift;
86 0           my $chrom = shift;
87 0 0         if(exists $self->{chr_list}{$chrom}){
  0            
88 0           $self->{chr_list}{$chrom}->do_bin_count_direction($self->{bin_size},$self->{frag_size});
89             }
90             else {warn "Chromosome name not exists! Bin count not done.\n";}
91             }
92              
93             # Smooth bin count signals for one chromosome.
94             sub chr_bin_smooth{
95 0     0 0   my $self = shift;
96 0 0         if(@_ < 3){
97 0           warn "Not enough arguments: chromosome name, smooth width and function string must be specified.\n";
98 0           return;
99             }
100 0           my($chrom,$smooth_width,$fun_str) = @_;
101 0 0         if(exists $self->{chr_list}{$chrom}){
  0            
102 0           $self->{chr_list}{$chrom}->smooth_bin_count($smooth_width,$fun_str);
103             }
104             else {warn "Chromosome name not exists! Bin smooth not done.\n";}
105             }
106              
107             # Calculate sliding window count for one chromosome.
108             sub chr_win_count{
109 0     0 0   my $self = shift;
110 0           my $chrom = shift;
111 0           my $bins_per_window = $self->{window_size} / $self->{bin_size};
112 0 0         if(exists $self->{chr_list}{$chrom}){
  0            
113 0 0         if(defined $self->{chr_list}{$chrom}->{bin_num}){
  0            
114 0           $self->{chr_list}{$chrom}->create_window_count($bins_per_window);
115             }
116             else {warn "Bin count vector must be created before window count.\n";}
117             }
118             else {warn "Chromosome name not exists! Window count not done.\n";}
119             }
120              
121             # Calculate sliding window count for one chromosome with direction.
122             sub chr_win_count_direction{
123 0     0 0   my $self = shift;
124 0           my $chrom = shift;
125 0           my $bins_per_window = $self->{window_size} / $self->{bin_size};
126 0 0         if(exists $self->{chr_list}{$chrom}){
  0            
127 0 0         if(defined $self->{chr_list}{$chrom}->{bin_num}){
  0            
128 0           $self->{chr_list}{$chrom}->create_window_count_direction($bins_per_window);
129             }
130             else {warn "Bin count vector must be created before window count.\n";}
131             }
132             else {warn "Chromosome name not exists! Window count not done.\n";}
133             }
134              
135             # Delete bin count vector for specified chromosome.
136             sub del_bin_count{
137 0     0 0   my $self = shift;
138 0           my $chrom = shift;
139 0 0         if(exists $self->{chr_list}{$chrom}){
140 0           $self->{chr_list}{$chrom}->{bin_count} = [];
141 0           $self->{chr_list}{$chrom}->{bin_count_F} = [];
142 0           $self->{chr_list}{$chrom}->{bin_count_R} = [];
143             }
144             }
145              
146             # Delete window count vector for specified chromosome.
147             sub del_win_count{
148 0     0 0   my $self = shift;
149 0           my $chrom = shift;
150 0 0         if(exists $self->{chr_list}{$chrom}){
151 0           $self->{chr_list}{$chrom}->{window_count} = [];
152 0           $self->{chr_list}{$chrom}->{window_count_F} = [];
153 0           $self->{chr_list}{$chrom}->{window_count_R} = [];
154             }
155             }
156              
157             # Return read count given chrom name and bin position.
158             sub get_bin_count{
159 0     0 0   my $self = shift;
160 0           my($chrom,$bin_pos) = @_;
161 0 0         if(exists $self->{chr_list}{$chrom}){
162 0           my $r_chrbed = $self->{chr_list}{$chrom};
163 0 0         if($bin_pos < $r_chrbed->{bin_num}){
164 0           return $r_chrbed->{bin_count}->[$bin_pos];
165             }
166             else{
167 0           my $bin_num = $r_chrbed->{bin_num};
168 0           warn "Chrom: $chrom, Bin position:$bin_pos exceeds boundary:$bin_num! Bin count zero returned.\n";
169 0           return 0;
170             }
171             }
172             else{
173 0           warn "Cannot find chrom name! Bin count zero returned.\n";
174 0           return 0;
175             }
176             }
177              
178             # Return read count with direction given chrom name and bin position.
179             sub get_bin_count_direction{
180 0     0 0   my $self = shift;
181 0           my($chrom,$bin_pos) = @_;
182 0 0         if(exists $self->{chr_list}{$chrom}){
183 0           my $r_chrbed = $self->{chr_list}{$chrom};
184 0 0         if($bin_pos < $r_chrbed->{bin_num}){
185 0           return ($r_chrbed->{bin_count_F}->[$bin_pos], $r_chrbed->{bin_count_R}->[$bin_pos]);
186             }
187             else{
188 0           my $bin_num = $r_chrbed->{bin_num};
189 0           warn "Chrom: $chrom, Bin position:$bin_pos exceeds boundary:$bin_num! Bin count zero returned.\n";
190 0           return 0;
191             }
192             }
193             else{
194 0           warn "Cannot find chrom name! Bin count zero returned.\n";
195 0           return 0;
196             }
197             }
198              
199             # Return read count given chrom name and window position.
200             sub get_win_count{
201 0     0 0   my $self = shift;
202 0           my($chrom,$win_pos) = @_;
203 0 0         if(exists $self->{chr_list}{$chrom}){
204 0           my $r_chrbed = $self->{chr_list}{$chrom};
205 0 0         if($win_pos < $r_chrbed->{window_num}){
206 0           return $r_chrbed->{window_count}[$win_pos];
207             }
208             else{
209 0           warn "Window position exceeds boundary! Window count zero returned.\n";
210 0           return 0;
211             }
212             }
213             else{
214 0           warn "Cannot find chrom name! Window count zero returned.\n";
215 0           return 0;
216             }
217             }
218              
219             # Return read count given chrom name and window position.
220             sub get_win_count_direction{
221 0     0 0   my $self = shift;
222 0           my($chrom,$win_pos) = @_;
223 0 0         if(exists $self->{chr_list}{$chrom}){
224 0           my $r_chrbed = $self->{chr_list}{$chrom};
225 0 0         if($win_pos < $r_chrbed->{window_num}){
226 0           return ($r_chrbed->{window_count_F}[$win_pos], $r_chrbed->{window_count_R}[$win_pos]);
227             }
228             else{
229 0           warn "Window position exceeds boundary! Window count zero returned.\n";
230 0           return 0;
231             }
232             }
233             else{
234 0           warn "Cannot find chrom name! Window count zero returned.\n";
235 0           return 0;
236             }
237             }
238              
239             # Return read count given a genomic region's coordinates.
240             sub get_region_count{
241 0     0 0   my $self = shift;
242 0           my($chrom,$start,$end) = @_;
243 0 0 0       if(($start-1) % $self->{bin_size} != 0 or $end % $self->{bin_size} != 0){
244 0           warn "start or end coordinate not on bin boundary. round to the nearest bin!\n";
245             }
246 0           my $bin_start = int(($start-1)/$self->{bin_size});
247 0           my $bin_end = int($end/$self->{bin_size}) - 1;
248 0           my $count = 0;
249 0 0         if($bin_start > $bin_end) {warn "Genomic region contains zero bin! Count zero returned.\n";}
  0            
  0            
250 0           else {for my $i($bin_start..$bin_end) {$count += $self->get_bin_count($chrom,$i);} }
251 0           return $count;
252             }
253              
254             # Return read count with direction given a genomic region's coordinates.
255             sub get_region_count_direction{
256 0     0 0   my $self = shift;
257 0           my($chrom,$start,$end) = @_;
258 0 0 0       if(($start-1) % $self->{bin_size} != 0 or $end % $self->{bin_size} != 0){
259 0           warn "start or end coordinate not on bin boundary. round to the nearest bin!\n";
260             }
261 0           my $bin_start = int(($start-1)/$self->{bin_size});
262 0           my $bin_end = int($end/$self->{bin_size}) - 1;
263 0           my @count = (0,0); # (Forward, Reverse).
264 0 0         if($bin_start > $bin_end) {warn "Genomic region contains zero bin! Count zero returned.\n";}
  0            
265             else {
266 0           for my $i($bin_start..$bin_end) {
267 0           my @bin_count = $self->get_bin_count_direction($chrom,$i);
268 0           $count[0] += $bin_count[0];
269 0           $count[1] += $bin_count[1];
270             }
271             }
272 0           return @count;
273             }
274              
275             # Delete all associated chromosome files but keep memory data.
276             sub del_all_chrfiles{
277 0     0 0   my $self = shift;
278 0 0         return if keys %{$self->{chr_list}} == 0;
  0            
279 0           while(my($chrom,$chrbed) = each %{$self->{chr_list}}){
  0            
280 0           $chrbed->del_file();
281             }
282             }
283              
284             1;
285              
286              
287             __END__