File Coverage

blib/lib/Bio/JBrowse/Store/NCList/LazyNCList.pm
Criterion Covered Total %
statement 113 121 93.3
branch 22 32 68.7
condition 7 12 58.3
subroutine 17 19 89.4
pod 4 13 30.7
total 163 197 82.7


line stmt bran cond sub pod time code
1             package Bio::JBrowse::Store::NCList::LazyNCList;
2             BEGIN {
3 2     2   785 $Bio::JBrowse::Store::NCList::LazyNCList::AUTHORITY = 'cpan:RBUELS';
4             }
5             {
6             $Bio::JBrowse::Store::NCList::LazyNCList::VERSION = '0.1';
7             }
8              
9 2     2   11 use strict;
  2         4  
  2         57  
10 2     2   9 use warnings;
  2         3  
  2         49  
11 2     2   10 use Carp;
  2         3  
  2         123  
12 2     2   11 use List::Util qw(max);
  2         3  
  2         210  
13              
14 2     2   1667 use Bio::JBrowse::Store::NCList::NCList;
  2         4  
  2         3016  
15              
16              
17             sub new {
18 1     1 1 19 my ($class, $attrs, $lazyClass, $makeLazy, $loadChunk,
19             $measure, $output, $sizeThresh) = @_;
20              
21 1         5 my $self = { attrs => $attrs,
22             start => $attrs->makeGetter("start"),
23             end => $attrs->makeGetter("end"),
24             setSublist => $attrs->makeSetter("sublist"),
25             lazyClass => $lazyClass,
26             makeLazy => $makeLazy,
27             loadChunk => $loadChunk,
28             measure => $measure,
29             output => $output,
30             sizeThresh => $sizeThresh,
31             count => 0,
32             minStart => undef,
33             maxEnd => undef,
34             chunkNum => 1,
35             chunkSizes => [],
36             partialStack => [] };
37 1         3 bless $self, $class;
38              
39 1         3 $self->addNewLevel();
40              
41 1         3 return $self;
42             }
43              
44             sub importExisting {
45 0     0 0 0 my ($class, $attrs, $lazyClass, $count, $minStart,
46             $maxEnd, $loadChunk, $topLevelList) = @_;
47              
48 0         0 my $self = { attrs => $attrs,
49             lazyClass => $lazyClass,
50             start => $attrs->makeGetter("start"),
51             end => $attrs->makeGetter("end"),
52             count => $count,
53             minStart => $minStart,
54             maxEnd => $maxEnd,
55             loadChunk => $loadChunk,
56             topLevelList => $topLevelList };
57 0         0 bless $self, $class;
58              
59 0         0 $self->addNewLevel();
60              
61 0         0 return $self;
62             }
63              
64              
65             sub addSorted {
66 2559     2559 1 33889 my ($self, $feat) = @_;
67              
68 2559         3657 $self->{count} += 1;
69 2559         3463 my $lastAdded = $self->{lastAdded};
70 2559 50       6512 my $start = $self->{start}->( $feat ) or die;
71 2559         6890 my $end = $self->{end}->( $feat );
72              
73 2559 100       4440 if (defined($lastAdded)) {
74 2558         6531 my $lastStart = $self->{start}->($lastAdded);
75 2558         6473 my $lastEnd = $self->{end}->($lastAdded);
76             # check that the input is sorted
77 2558 50       5271 $lastStart <= $start
78             or die "input not sorted: got start $lastStart before $start";
79              
80 2558 50 33     6296 die "input not sorted: got $lastStart..$lastEnd before $start..$end"
81             if $lastStart == $start && $lastEnd < $end;
82             } else {
83             # LazyNCList requires sorted input, so the start of the first feat
84             # is the minStart
85 1         3 $self->{minStart} = $start;
86             }
87              
88 2559         4030 $self->{lastAdded} = $feat;
89              
90 2559         3486 my $chunkSizes = $self->{chunkSizes};
91 2559         3254 my $partialStack = $self->{partialStack};
92              
93 2559         6662 for (my $level = 0; $level <= $#$partialStack; $level++) {
94             # due to NCList nesting, among other things, it's hard to be exactly
95             # precise about the size of the JSON serialization, but this will get
96             # us pretty close.
97 2580         5697 my $featSize = $self->{measure}->($feat);
98 2580         21260 my $proposedChunkSize = $chunkSizes->[$level] + $featSize;
99             #print STDERR "chunksize at $level is now " . $chunkSizes->[$level] . "; (next chunk is " . $self->{chunkNum} . ")\n";
100              
101             # If this partial chunk is full,
102 2580 100 66     7460 if ( $proposedChunkSize > $self->{sizeThresh} && @{$partialStack->[$level]} ){
  21         73  
103             # then we're finished with the current "partial" chunk (i.e.,
104             # it's now a "complete" chunk rather than a partial one), so
105             # create a new NCList to hold all the features in this chunk.
106 21         70 my $lazyFeat = $self->finishChunk( $partialStack->[$level] );
107              
108             # start a new partial chunk with the current feature
109 21         194 $partialStack->[$level] = [$feat];
110 21         34 $chunkSizes->[$level] = $featSize;
111              
112             # and propagate $lazyFeat up to the next level
113 21         25 $feat = $lazyFeat;
114              
115             # if we're already at the highest level,
116 21 100       64 if ($level == $#{$self->{partialStack}}) {
  21         220  
117             # then we need to make a new level to have somewhere to put
118             # the new lazy feat
119 1         3 $self->addNewLevel();
120             }
121             } else {
122             # add the current feature the partial chunk at this level
123 2559         2857 push @{$partialStack->[$level]}, $feat;
  2559         4677  
124 2559         3551 $chunkSizes->[$level] = $proposedChunkSize;
125 2559         6908 last;
126             }
127             }
128             }
129              
130             sub addNewLevel {
131 2     2 0 2 my ($self) = @_;
132 2         4 push @{$self->{partialStack}}, [];
  2         8  
133 2         3 push @{$self->{chunkSizes}}, 0;
  2         13  
134             }
135              
136             sub finishChunk {
137 22     22 0 41 my ($self, $featList) = @_;
138 22         145 my $newNcl = Bio::JBrowse::Store::NCList::NCList->new(
139             $self->{start},
140             $self->{end},
141             $self->{setSublist},
142             $featList
143             );
144 22         44 my $chunkId = $self->{chunkNum};
145 22         43 $self->{chunkNum} += 1;
146 22         70 $self->{output}->($newNcl->nestedList, $chunkId);
147              
148 22 100       168 $self->{maxEnd} = $newNcl->maxEnd unless defined($self->{maxEnd});
149 22         82 $self->{maxEnd} = max($self->{maxEnd}, $newNcl->maxEnd);
150              
151             # return the lazy ("fake") feature representing this chunk
152 22         69 return $self->{makeLazy}->($newNcl->minStart, $newNcl->maxEnd, $chunkId);
153             }
154              
155              
156             sub finish {
157 1     1 1 9 my ($self) = @_;
158 1         3 my $level;
159              
160 1         2 for ($level = 0; $level < $#{$self->{partialStack}}; $level++) {
  2         9  
161 1         5 my $lazyFeat = $self->finishChunk($self->{partialStack}->[$level]);
162              
163             # pass $lazyFeat up to the next higher level.
164             # (the loop ends one level before the highest level, so there
165             # will always be at least one higher level)
166 1         9 push @{$self->{partialStack}->[$level + 1]}, $lazyFeat;
  1         5  
167             }
168              
169             # make sure there's a top-level NCL
170 1         2 $level = $#{$self->{partialStack}};
  1         3  
171 1         7 my $newNcl = Bio::JBrowse::Store::NCList::NCList->new(
172             $self->{start},
173             $self->{end},
174             $self->{setSublist},
175             $self->{partialStack}->[$level]);
176 1         6 $self->{maxEnd} = max( grep defined, $self->{maxEnd}, $newNcl->maxEnd );
177             #print STDERR "top level NCL has " . scalar(@{$self->{partialStack}->[$level]}) . " features\n";
178 1         5 $self->{topLevelList} = $newNcl->nestedList;
179             }
180              
181             sub binarySearch {
182 36     36 0 95 my ($self, $arr, $item, $getter) = @_;
183              
184 36         46 my $low = -1;
185 36         41 my $high = $#{$arr} + 1;
  36         58  
186 36         52 my $mid;
187              
188 36         85 while ($high - $low > 1) {
189 151         240 $mid = int(($low + $high) / 2);
190 151 50       406 if ($getter->($arr->[$mid]) > $item) {
191 151         379 $high = $mid;
192             } else {
193 0         0 $low = $mid;
194             }
195             }
196              
197             # if we're iterating rightward, return the high index;
198             # if leftward, the low index
199 36 50       80 if ($getter == $self->{end}) { return $high } else { return $low };
  36         83  
  0         0  
200             };
201              
202             sub iterHelper {
203 36     36 0 78 my ($self, $arr, $from, $to, $fun, $inc,
204             $searchGet, $testGet, $path) = @_;
205 36         48 my $len = $#{$arr} + 1;
  36         69  
206 36         102 my $i = $self->binarySearch($arr, $from, $searchGet);
207 36         125 my $getChunk = $self->{attrs}->makeGetter("chunk");
208 36         112 my $getSublist = $self->{attrs}->makeGetter("sublist");
209              
210 36   66     213 while (($i < $len)
      66        
211             && ($i >= 0)
212             && (($inc * $testGet->($arr->[$i])) < ($inc * $to)) ) {
213              
214 2581 100       6323 if ($arr->[$i][0] == $self->{lazyClass}) {
215 22         61 my $chunkNum = $getChunk->($arr->[$i]);
216 22         110 my $chunk = $self->{loadChunk}->($chunkNum);
217 22         157 $self->iterHelper($chunk, $from, $to, $fun, $inc,
218             $searchGet, $testGet, [$chunkNum]);
219             } else {
220 2559         8533 $fun->($arr->[$i], [@$path, $i]);
221             }
222              
223 2581         11851 my $sublist = $getSublist->($arr->[$i]);
224 2581 100       5433 if (defined($sublist)) {
225 13         61 $self->iterHelper($sublist, $from, $to, $fun, $inc,
226             $searchGet, $testGet, [@$path, $i]);
227             }
228 2581         15375 $i += $inc;
229             }
230             }
231              
232              
233             sub overlapCallback {
234 1     1 1 4 my ($self, $from, $to, $fun) = @_;
235              
236 1 50       7 croak "LazyNCList not loaded" unless defined($self->{topLevelList});
237              
238 1 50       6 return unless $self->count;
239              
240             # inc: iterate leftward or rightward
241 1 50       6 my $inc = ($from > $to) ? -1 : 1;
242             # searchGet: search on start or end
243 1 50       5 my $searchGet = ($from > $to) ? $self->{start} : $self->{end};
244             # testGet: test on start or end
245 1 50       6 my $testGet = ($from > $to) ? $self->{end} : $self->{start};
246             # treats the root chunk as number 0
247 1         8 $self->iterHelper($self->{topLevelList}, $from, $to, $fun,
248             $inc, $searchGet, $testGet, [0]);
249             }
250              
251 1     1 0 9 sub count { return shift->{count}; }
252              
253 1     1 0 10 sub maxEnd { return shift->{maxEnd}; }
254              
255 1     1 0 1713402 sub minStart { return shift->{minStart}; }
256              
257 0     0 0   sub topLevelList { return shift->{topLevelList}; }
258              
259             1;
260              
261             __END__