File Coverage

blib/lib/GenOO/RegionCollection/Type/DoubleHashArray.pm
Criterion Covered Total %
statement 82 108 75.9
branch 9 24 37.5
condition 1 3 33.3
subroutine 30 36 83.3
pod 0 17 0.0
total 122 188 64.8


line stmt bran cond sub pod time code
1             # POD documentation - main docs before the code
2              
3             =head1 NAME
4              
5             GenOO::RegionCollection::Type::DoubleHashArray - Object for a collection of GenOO::Region objects, with features
6              
7             =head1 SYNOPSIS
8              
9             # Object that manages a collection of GenOO::Region objects.
10              
11             # To initialize
12             my $locus_collection = GenOO::RegionCollection::DoubleHashArray->new({
13             name => undef,
14             species => undef,
15             description => undef,
16             extra => undef,
17             });
18              
19              
20             =head1 DESCRIPTION
21              
22             The primary data structure of this object is a 2D hash whose primary key
23             is the strand and its secondary key is the reference sequence name. Each
24             such pair of keys correspond to an array reference which stores objects of
25             the class L<GenOO::Region> sorted by start position.
26              
27             =head1 EXAMPLES
28              
29             # Get the records contained in a specific region
30             my @recs = $region_collection->records_contained_in_region(
31             1, 'chr3', 127726308, 127792250);
32              
33             # Get the longest record
34             my $longest_record = $region_collection->longest_record;
35              
36             =cut
37              
38             # Let the code begin...
39              
40             package GenOO::RegionCollection::Type::DoubleHashArray;
41             $GenOO::RegionCollection::Type::DoubleHashArray::VERSION = '1.5.2';
42              
43             #######################################################################
44             ####################### Load External modules #####################
45             #######################################################################
46 1     1   8 use Modern::Perl;
  1         2  
  1         31  
47 1     1   304 use autodie;
  1         4  
  1         15  
48 1     1   6125 use Moose;
  1         3  
  1         17  
49 1     1   7909 use namespace::autoclean;
  1         3  
  1         9  
50              
51              
52             #######################################################################
53             ######################### Load GenOO modules ######################
54             #######################################################################
55 1     1   884 use GenOO::Module::Search::Binary;
  1         3  
  1         34  
56 1     1   492 use GenOO::Data::Structure::DoubleHashArray;
  1         7  
  1         1912  
57              
58              
59             #######################################################################
60             ####################### Interface attributes ######################
61             #######################################################################
62             has 'name' => (
63             isa => 'Str',
64             is => 'rw'
65             );
66              
67             has 'species' => (
68             isa => 'Str',
69             is => 'rw'
70             );
71              
72             has 'description' => (
73             isa => 'Str',
74             is => 'rw'
75             );
76              
77             has 'longest_record' => (
78             is => 'ro',
79             builder => '_find_longest_record',
80             clearer => '_clear_longest_record',
81             init_arg => undef,
82             lazy => 1,
83             );
84              
85             has 'extra' => (
86             is => 'rw'
87             );
88              
89              
90             #######################################################################
91             ######################## Private attributes #######################
92             #######################################################################
93             has '_container' => (
94             is => 'ro',
95             builder => '_build_container',
96             init_arg => undef,
97             lazy => 1
98             );
99              
100             #######################################################################
101             ########################## Consumed Roles #########################
102             #######################################################################
103             with 'GenOO::RegionCollection';
104              
105              
106             #######################################################################
107             ######################## Interface Methods ########################
108             #######################################################################
109             sub add_record {
110 1768     1768 0 4091 my ($self, $record) = @_;
111 1768         57558 $self->_container->add_entry($record->strand, $record->rname, $record);
112 1768         4880 $self->_reset;
113             }
114              
115             sub all_records {
116 6     6 0 2292 my ($self) = @_;
117              
118 6         38 my @all_records;
119             $self->foreach_record_do(sub {
120 158     158   319 push @all_records, $_[0];
121 6         111 });
122              
123 6 100       124 return wantarray ? @all_records : \@all_records;
124             }
125              
126             sub foreach_record_do {
127 12     12 0 1308 my ($self, $block) = @_;
128 12         486 $self->_container->foreach_entry_do($block);
129             }
130              
131             sub foreach_record_on_rname_do {
132 1     1 0 1206 my ($self, $rname, $block) = @_;
133 1         44 $self->_container->foreach_entry_on_secondary_key_do($rname, $block);
134             }
135              
136             sub records_count {
137 14     14 0 13024 my ($self) = @_;
138 14         646 return $self->_container->entries_count;
139             }
140              
141             sub strands {
142 1     1 0 1221 my ($self) = @_;
143 1         44 return $self->_container->primary_keys();
144             }
145              
146             sub rnames_for_strand {
147 2     2 0 1910 my ($self, $strand) = @_;
148 2         86 return $self->_container->secondary_keys_for_primary_key($strand);
149             }
150              
151             sub rnames_for_all_strands {
152 1     1 0 1584 my ($self) = @_;
153 1         44 return $self->_container->secondary_keys_for_all_primary_keys();
154             }
155              
156             sub longest_record_length {
157 1     1 0 1328 my ($self) = @_;
158 1         48 return $self->longest_record->length;
159             }
160              
161             sub is_empty {
162 1     1 0 1202 my ($self) = @_;
163 1         46 return $self->_container->is_empty;
164             }
165              
166             sub is_not_empty {
167 1     1 0 1208 my ($self) = @_;
168 1         45 return $self->_container->is_not_empty;
169             }
170              
171             sub foreach_contained_record_do {
172 3     3 0 1451 my ($self, $strand, $chr, $start, $stop, $block) = @_;
173              
174 3         145 $self->_container->sort_entries;
175              
176             # Get a reference on the array containing the records of the specified strand and rname
177 3 50       24 my $records_ref = $self->_records_ref_for_strand_and_rname($strand, $chr) or return ();
178              
179             # Get index of record whose start is closest but greater than the start of the region.
180             my $index = GenOO::Module::Search::Binary->binary_search_for_value_greater_or_equal(
181 3     12   45 $start, $records_ref, sub {return $_[0]->start});
  12         311  
182 3 50       17 return 0 if !defined $index;
183              
184             # Scan records downstream of index for overlaps
185 3         13 while ($index < @$records_ref) {
186 6         11 my $record = $records_ref->[$index];
187 6 50       147 if ($record->stop <= $stop) {
188 6         16 my $return = $block->($record);
189 6 50 33     49 last if defined $return and $return eq 'break_loop';
190             }
191 6 50       155 last if $record->start > $stop; # no chance to find overlap after this
192 6         21 $index++;
193             }
194             }
195              
196             sub records_contained_in_region {
197 2     2 0 2112 my ($self, $strand, $chr, $start, $stop) = @_;
198              
199 2         5 my @records;
200             $self->foreach_contained_record_do($strand, $chr, $start, $stop, sub {
201 4     4   27 push @records, $_[0];
202 2         25 });
203              
204 2         11 return @records;
205             }
206              
207             sub total_copy_number {
208 1     1 0 1218 my ($self, $block) = @_;
209              
210 1         4 my $total_copy_number = 0;
211 1     12   21 $self->foreach_record_do( sub {$total_copy_number += $_[0]->copy_number} );
  12         320  
212              
213 1         7 return $total_copy_number;
214             }
215              
216             sub total_copy_number_for_records_contained_in_region {
217 0     0 0 0 my ($self, $strand, $rname, $start, $stop) = @_;
218              
219 0         0 my $total_copy_number = 0;
220             $self->foreach_contained_record_do($strand, $rname, $start, $stop, sub {
221 0     0   0 $total_copy_number += $_[0]->copy_number
222 0         0 });
223              
224 0         0 return $total_copy_number;
225             }
226              
227             #######################################################################
228             ######################### Private methods ##########################
229             #######################################################################
230             sub _build_container {
231             return GenOO::Data::Structure::DoubleHashArray->new(
232 22     22   534 sorting_code_block => sub {return $_[0]->start <=> $_[1]->start}
233 43     43   2384 );
234             }
235              
236             sub _find_longest_record {
237 3     3   7 my ($self) = @_;
238              
239 3         8 my $longest_record;
240 3         7 my $longest_record_length = 0;
241             $self->foreach_record_do(
242             sub {
243 37     37   64 my ($record) = @_;
244              
245 37 100       844 if ($record->length > $longest_record_length) {
246 12         284 $longest_record_length = $record->length;
247 12         44 $longest_record = $record;
248             }
249             }
250 3         34 );
251              
252 3         108 return $longest_record;
253             }
254              
255             sub _records_ref_for_strand_and_rname {
256 4     4   1319 my ($self, $strand, $chr) = @_;
257 4         156 return $self->_container->entries_ref_for_keys($strand, $chr);
258             }
259              
260             sub _reset {
261 1768     1768   3193 my ($self) = @_;
262 1768         67451 $self->_clear_longest_record;
263             }
264              
265              
266             #######################################################################
267             ####################### Deprecated methods #########################
268             #######################################################################
269             sub foreach_overlapping_record_do {
270 0     0 0   my ($self, $strand, $chr, $start, $stop, $block) = @_;
271              
272 0           warn 'Deprecated use of "foreach_overlapping_record_do". '.
273             'Use "foreach_contained_record_do" instead.' . "\n";
274              
275 0           $self->_container->sort_entries;
276              
277             # Get a reference on the array containing the records of the specified strand and rname
278 0 0         my $records_ref = $self->_records_ref_for_strand_and_rname($strand, $chr) or return ();
279              
280             # Find the closest but greater value to a target value. Target value is defined such that
281             # any records with smaller values are impossible to overlap with the requested region
282             # This allows us to search only from that point onwards for overlap
283 0           my $target_value = $start - $self->longest_record->length;
284             my $index = GenOO::Module::Search::Binary->binary_search_for_value_greater_or_equal(
285             $target_value, $records_ref,
286             sub {
287 0     0     return $_[0]->start
288             }
289 0           );
290              
291             # If a value close and greater than the target value exists scan downstream for overlaps
292 0 0         if (defined $index) {
293 0           while ($index < @$records_ref) {
294 0           my $record = $records_ref->[$index];
295 0 0         if ($record->start <= $stop) {
296 0 0         if ($start <= $record->stop) {
297 0 0         last if $block->($record) eq 'break_loop';
298             }
299             }
300             else {
301 0           last; # No chance to find overlap from now on
302             }
303 0           $index++;
304             }
305             }
306             }
307              
308             sub records_overlapping_region {
309 0     0 0   my ($self, $strand, $chr, $start, $stop) = @_;
310              
311 0           warn 'Deprecated use of "records_overlapping_region". '.
312             'Use "records_contained_in_region" instead.' . "\n";
313              
314 0           my @overlapping_records;
315             $self->foreach_overlapping_record_do($strand, $chr, $start, $stop, sub {
316 0     0     push @overlapping_records, $_[0];
317 0           });
318              
319 0           return @overlapping_records;
320             }
321              
322              
323              
324             __PACKAGE__->meta->make_immutable;
325             1;