File Coverage

blib/lib/Tie/PagedArray.pm
Criterion Covered Total %
statement 276 315 87.6
branch 87 118 73.7
condition 15 27 55.5
subroutine 25 28 89.2
pod 1 2 50.0
total 404 490 82.4


line stmt bran cond sub pod time code
1             package Tie::PagedArray;
2             our $VERSION = '0.02';
3 6     6   27608 use 5.008;
  6         21  
  6         295  
4              
5             =pod
6              
7             =head1 NAME
8              
9             Tie::PagedArray - A tieable module for handling large arrays by paging
10              
11             =head1 VERSION
12              
13             Version 0.02
14              
15             =head1 SYNOPSIS
16              
17             tie my(@large_array), 'Tie::PagedArray';
18              
19             tie my(@large_array), 'Tie::PagedArray', page_size => 100, paging_dir => '/tmp';
20              
21             =head1 DESCRIPTION
22              
23             When processing a large volumes of data a program may run out of memory. The operating system may impose a limit on the amount of memory a process can consume or the machine may simply lack the required amount of memory.
24              
25             Tie::PagedArray supports large arrays by implementing paging and avoids running out of memory.
26             The array is broken into pages and these pages are pushed to disk barring the page that is in use. Performance depends on the device chosen for persistence of pages.
27              
28             This module uses L as its backend for serialization and deserialization. So the elements of the paged array can be any value or object. See documentation for L module to work with code refs.
29              
30             When switching pages data from the currently active page is offloaded from the memory onto the page file if the page is marked dirty. This is followed by deserializing the page file of the page to which the switch is to be made.
31              
32             An active page is marked dirty by an B of a value to any element in the page. To forcibly mark a page dirty assign an element in the page to itself!
33              
34             $large_array[2000] = $large_array[2000];
35              
36             The defaults are C 2000>, C ".">
37              
38             =head1 METHODS
39              
40             =cut
41              
42 6     6   34 use strict;
  6         11  
  6         189  
43 6     6   31 use warnings;
  6         15  
  6         190  
44              
45 6     6   6514 use Storable ();
  6         22669  
  6         161  
46 6     6   4978 use Tie::Array;
  6         7454  
  6         653  
47              
48             our @ISA = ('Tie::Array');
49              
50             # Default
51             our $ELEMS_PER_PAGE = 2000;
52              
53             # The pointers to store and retrieve
54             # The user can change this to Storable::nstore for sharing the page files across platforms
55             our $STORE_DELEGATE = \&Storable::store;
56             our $RETRIEVE_DELEGATE = \&Storable::retrieve;
57              
58             # Object properties
59              
60             use constant {
61             # Array properties
62 6         37735 ARRAY_PAGE_BANK => 0,
63             ARRAY_ACTIVE_PAGE_NUM => 1,
64             ARRAY_PAGE_SIZE => 2,
65             ARRAY_LENGTH => 3,
66             ARRAY_PAGING_DIR => 4,
67             ARRAY_PAGE_BEG_IDX=> 5,
68             ARRAY_PAGE_END_IDX=> 6,
69             # Page properties
70             PAGE_DATA => 0,
71             PAGE_LENGTH => 1,
72             PAGE_DIRTY => 2,
73             PAGE_FILE => 3,
74             PAGE_INDEX => 4,
75 6     6   45 };
  6         11  
76              
77             my $PAGE_NUM = 0;
78              
79             =pod
80              
81             =head2 tie
82              
83             The C call lets you create a new B object.
84              
85             tie my(@large_array), 'Tie::PagedArray';
86             tie my(@large_array), 'Tie::PagedArray', page_size => 100;
87             tie my(@large_array), 'Tie::PagedArray', page_size => 100, paging_dir => '/tmp';
88              
89             Ties the array C<@large_array> to C class.
90              
91             C is the size of a page. If C is omitted then it defaults to 2000 elements. The default page size can be changed by setting the package variable C. The change in default only affects future ties.
92              
93             $Tie::PagedArray::ELEMS_PER_PAGE = 2000;
94              
95             C is a directory to store the page files. Choose a directory on a fast storage device. If omitted it defaults to the current working directory.
96              
97             =cut
98              
99             sub TIEARRAY {
100 18     18   3266 my ($class, %params) = @_;
101 18         50 my ($page_size, $paging_dir, $use_nstore) = @params{'page_size', 'paging_dir'};
102 18 50 33     133 $page_size = $page_size && int($page_size) > 0 ? int($page_size) : $ELEMS_PER_PAGE;
103 18 100 66     301 $paging_dir = "." unless $paging_dir && -d $paging_dir;
104              
105             # [PAGE_BANK, ACTIVE_PAGE_NUM, PAGE_SIZE , LENGTH, PAGING_DIR , ARRAY_PAGE_BEG_IDX, ARRAY_PAGE_END_IDX]
106 18         60 my $self = [[] , 0 , $page_size, 0 , $paging_dir, 0 , -1 ];
107 18         97 return bless $self, $class;
108             }
109              
110             sub FETCHSIZE {
111 63503     63503   18274411 return $_[0]->[ARRAY_LENGTH];
112             }
113              
114             sub STORESIZE {
115 2459     2459   12856 local($_);
116 2459         4757 my ($self, $new_size, $page_num, $new_page_size) = @_;
117              
118 2459 100       7566 return $self->CLEAR() if $new_size < 1;
119              
120 1781 100       3839 ($page_num, $new_page_size) = $self->_calc_page_offset($new_size) unless defined($page_num);
121              
122 1781         2668 my $page_bank = $self->[ARRAY_PAGE_BANK];
123              
124             # Add/remove page from the bank
125 1781         2766 my $last_page_idx = $#$page_bank;
126 1781         3142 my $new_pages_count = $page_num - $last_page_idx;
127              
128 1781 100       4544 if ($new_pages_count > 0) {
    100          
129             # Last page should tend towards standard page size
130 1377 100       9578 $page_bank->[-1]->[PAGE_LENGTH] = $self->[ARRAY_PAGE_SIZE] if @$page_bank;
131             # Add new cache to the bank if array is growing
132 1377         2853 for (1..$new_pages_count) {
133 3891         9358 my $page = $self->_new_page();
134 3891         7251 $page->[PAGE_LENGTH] = $self->[ARRAY_PAGE_SIZE];
135 3891         10838 push(@$page_bank, $page);
136             }
137 1377         4082 $page_bank->[$self->[ARRAY_ACTIVE_PAGE_NUM]]->[PAGE_DIRTY] = 1;
138             } elsif ($new_pages_count < 0) {
139 3         16 for (@$page_bank[$last_page_idx + $new_pages_count + 1 .. $last_page_idx]) {
140 3         8 my $page_file = $_->[PAGE_FILE];
141             # Free up extra pages if array is downsizing
142 3 50 33     435 defined($page_file) && -f($page_file) && unlink($page_file);
143             }
144              
145 3         24 $#$page_bank = $last_page_idx + $new_pages_count;
146             }
147              
148             # Allocate/free up space in the page
149 1781         2618 $page_bank->[$page_num]->[PAGE_LENGTH] = $new_page_size;
150              
151 1781         4053 $self->[ARRAY_LENGTH] = $self->_calc_length();
152              
153             # Do nothing if switching to currently active page_file
154 1781 100       7873 $self->_switch_to_page($page_num) if $page_num != $self->[ARRAY_ACTIVE_PAGE_NUM];
155              
156 1781         4106 return $self->[ARRAY_LENGTH];
157             }
158              
159             sub STORE {
160 1762     1762   12597 local($_);
161 1762         3392 my ($self, $index, $value, $page_num, $offset) = @_;
162              
163             # Location in the pages to store the value
164 1762 50       7098 ($page_num, $offset) = $self->_calc_page_offset($index) unless defined($page_num);
165              
166             # Grow/shrink array
167 1762         3137 my $resized = undef;
168              
169 1762 50       4075 $self->STORESIZE($index + 1, $page_num, $offset + 1) if $index >= $self->FETCHSIZE();
170              
171             # Switch to page identified by page_num
172 1762 50       4367 $self->_switch_to_page($page_num) if $page_num != $self->[ARRAY_ACTIVE_PAGE_NUM];
173              
174 1762         7150 my $page = $self->[ARRAY_PAGE_BANK]->[$page_num];
175 1762         2387 $page->[PAGE_DIRTY] = 1;
176              
177 1762         9398 return $page->[PAGE_DATA]->[$offset] = $value;
178             }
179              
180             sub FETCH {
181 59818     59818   331183 local($_);
182 59818         102844 my ($self, $index) = @_;
183              
184             # Location in the pages to store the value
185 59818         121943 my ($page_num, $offset) = $self->_calc_page_offset($index);
186              
187             # Check for out of bounds
188 59818 50       147470 $self->EXISTS($index, $page_num, $offset) or return ();
189              
190 59818         97425 my $page = $self->[ARRAY_PAGE_BANK]->[$page_num];
191              
192             # To make nested paged structures work. Known inefficiency!
193             #$page->[PAGE_DIRTY] = 1;
194             # To make updates to nested structures work just do: $arr[6] = $arr[6]; forcing a STORE operation
195              
196             # Switch to page identified by the page_num
197 59818 100       162186 $self->_switch_to_page($page_num) if $page_num != $self->[ARRAY_ACTIVE_PAGE_NUM];
198              
199 59818         261545 return $page->[PAGE_DATA]->[$offset];
200             }
201              
202             sub EXISTS {
203 59818     59818   71605 local($_);
204 59818         84952 my ($self, $index, $page_num, $offset) = @_;
205              
206 59818 50       132804 ($page_num, $offset) = $self->_calc_page_offset($index) unless defined($page_num);
207 59818 50 33     63361 return undef if $page_num > $#{$self->[ARRAY_PAGE_BANK]} || $offset >= $self->[ARRAY_PAGE_BANK]->[$page_num]->[PAGE_LENGTH];
  59818         331881  
208 59818         192212 return 1;
209             }
210              
211             sub CLEAR {
212 731     731   36403 local($_);
213 731         1556 my ($self) = @_;
214              
215 731         1249 unlink($_->[PAGE_FILE]) foreach @{$self->[ARRAY_PAGE_BANK]};
  731         879161  
216 731         3921 @$self[ARRAY_PAGE_BANK, ARRAY_ACTIVE_PAGE_NUM, ARRAY_PAGE_BEG_IDX, ARRAY_PAGE_END_IDX] = ([], 0, 0, 0);
217 731         9234 return $self->[ARRAY_LENGTH] = 0;
218             }
219              
220             sub DELETE {
221 0     0   0 local($_);
222 0         0 my ($self, $index) = @_;
223              
224 0         0 my $last_index = $self->FETCHSIZE - 1;
225 0 0       0 if ($index > $last_index) {
226 0         0 return undef;
227             } else {
228 0         0 my ($page_num, $offset) = $self->_calc_page_offset($index);
229 0         0 my $value = $self->FETCH($index, $page_num, $offset);
230 0         0 $self->STORE($index, undef, $page_num, $offset);
231 0         0 $self->[ARRAY_PAGE_BANK]->[$page_num]->[PAGE_DIRTY] = 1;
232              
233 0         0 return $value;
234             }
235             }
236              
237             sub PUSH {
238 29     29   1054 local($_);
239 29         55 my $self = shift;
240 29         88 my $i = $self->FETCHSIZE();
241 29         161 $self->STORE($i++, shift) while @_;
242 29         97 return $i;
243             }
244              
245             sub POP {
246 20     20   16239 local($_);
247 20         33 my $self = shift;
248 20         43 my $newsize = $self->FETCHSIZE() - 1;
249 20         34 my $val;
250 20 50       53 if ($newsize >= 0) {
251 20         46 $val = $self->FETCH($newsize);
252 20         61 $self->STORESIZE($newsize);
253             }
254 20         87 return $val;
255             }
256              
257             sub SHIFT {
258 20     20   114 local($_);
259 20         44 my $self = shift;
260 20 50       65 return undef unless $self->[ARRAY_LENGTH] > 0;
261              
262 20 100       82 my $page = $self->[ARRAY_ACTIVE_PAGE_NUM] != 0 ? $self->_switch_to_page(0) : $self->[ARRAY_PAGE_BANK]->[0];
263 20         34 my $val = shift(@{$page->[PAGE_DATA]});
  20         50  
264              
265 20 100       60 if(--$page->[PAGE_LENGTH]) {
266 16         29 $page->[PAGE_DIRTY] = 1;
267 16         33 $self->[ARRAY_PAGE_END_IDX]--;
268 16         50 $self->[ARRAY_LENGTH] = $self->_calc_length();
269             } else {
270             # If page is now empty delete it
271 4 50       620 unlink $page->[PAGE_FILE] if -f $page->[PAGE_FILE];
272 4         9 shift(@{$self->[ARRAY_PAGE_BANK]});
  4         12  
273 4         16 $self->[ARRAY_LENGTH] = $self->_calc_length();
274 4         13 $page = $self->_switch_to_page(0);
275             }
276            
277 20         80 return $val;
278             }
279              
280             sub UNSHIFT {
281 160     160   174326 local($_);
282 160         341 my $self = shift;
283 160 50       608 return $self->[ARRAY_LENGTH] unless @_;
284              
285 160         260 my $page = undef;
286 160 100       723 if($self->[ARRAY_ACTIVE_PAGE_NUM] == 0) {
    50          
287 6         14 $page = $self->[ARRAY_PAGE_BANK]->[0];
288             } elsif ($self->[ARRAY_ACTIVE_PAGE_NUM] > 0) {
289 154         518 $page = $self->_switch_to_page(0);
290             }
291              
292             # Array is empty. Create new page
293 160 100       696 unshift(@{$self->[ARRAY_PAGE_BANK]}, $page = $self->_new_page()) if !defined($page);
  1         5  
294              
295 160         335 my $std_page_size = $self->[ARRAY_PAGE_SIZE];
296 160         377 my $room = $std_page_size - $page->[PAGE_LENGTH];
297 160 100       443 $room = @_ if @_ < $room;
298 160 100       411 $page->[PAGE_LENGTH] = unshift(@{$page->[PAGE_DATA]}, splice(@_, -$room)) if $room > 0;
  113         790  
299 160         409 $page->[PAGE_DIRTY] = 1;
300              
301 160         271 my $remain_len = @_;
302 160         614 while($remain_len) {
303 143         223 $self->[ARRAY_ACTIVE_PAGE_NUM]++;
304 143         237 unshift(@{$self->[ARRAY_PAGE_BANK]}, $page = $self->_new_page());
  143         582  
305 143         322 $page->[PAGE_INDEX] = 0;
306 143         302 $page = $self->_switch_to_page(0);
307 143 100       439 $std_page_size = $remain_len if $std_page_size > $remain_len;
308 143         213 $page->[PAGE_LENGTH] = unshift(@{$page->[PAGE_DATA]}, splice(@_, -$std_page_size));
  143         714  
309 143         249 $page->[PAGE_DIRTY] = 1;
310 143         441 $remain_len = @_;
311             }
312              
313 160         439 @$self[ARRAY_PAGE_BEG_IDX, ARRAY_PAGE_END_IDX] = (0, $page->[PAGE_LENGTH] - 1);
314              
315 160         630 return $self->[ARRAY_LENGTH] = $self->_calc_length();
316             }
317              
318             sub DESTROY {
319 18     18   2972 local($_);
320 18         63 $_[0]->CLEAR;
321             }
322              
323             sub SPLICE {
324 11     11   296 local($_);
325 11         17 my $self = shift;
326 11 50       32 my $index = scalar(@_) ? shift : 0;
327 11         23 my $size = $self->FETCHSIZE();
328 11 50       20 my $len = scalar(@_) ? shift : $size - $index;
329              
330 11         60 tie my(@result), ref($self), page_size => $self->[ARRAY_PAGE_SIZE], paging_dir => $self->[ARRAY_PAGING_DIR];
331              
332 11 50       31 $len += $size - $index if $len < 0;
333 11 50       23 $index = $size if $index > $size;
334 11 50       26 $len -= $index + $len - $size if $index + $len > $size;
335              
336 11         10 my $val;
337 11         17 my $page_bank = $self->[ARRAY_PAGE_BANK];
338 11         14 my $new_elems_len = scalar(@_);
339              
340             ###
341 11         13 my ($page_num, $page_offset);
342 11 100       22 my $copy_len = $new_elems_len <= $len ? $new_elems_len : $len;
343 11         20 my $end_index = $index + $copy_len;
344            
345 11         14 my $j = 0;
346 11         12 my $page;
347 11         29 for(my $i = $index; $i < $end_index; $i++) {
348 3         10 my ($page_num, $offset) = $self->_calc_page_offset($i);
349 3 100       16 $self->_switch_to_page($page_num) if $page_num != $self->[ARRAY_ACTIVE_PAGE_NUM];
350 3         5 $page = $page_bank->[$page_num];
351 3         12 push(@result, $page->[PAGE_DATA]->[$offset]);
352 3         6 $page->[PAGE_DATA]->[$offset] = $_[$j++];
353 3         11 $page->[PAGE_DIRTY] = 1;
354             }
355 11 50       24 return @result if $new_elems_len == $len;
356              
357 11 100       21 if ($new_elems_len < $len) {
358             # Shrink the array
359 3         8 my $del_end_index = $index + $len - 1;
360 3         9 my ($del_start_page_num, $del_start_offset) = $self->_calc_page_offset($end_index);
361 3         8 my ($del_end_page_num, $del_end_offset) = $self->_calc_page_offset($del_end_index);
362 3 100       14 $self->_switch_to_page($del_start_page_num) if $del_start_page_num != $self->[ARRAY_ACTIVE_PAGE_NUM];
363 3         6 my $page = $page_bank->[$del_start_page_num];
364 3 50       10 if ($del_start_page_num == $del_end_page_num) {
365             # Elems to be removed are in the same page
366 3         5 push(@result, splice(@{$page->[PAGE_DATA]}, $del_start_offset, $del_end_offset - $del_start_offset + 1));
  3         14  
367 3         7 @$page[PAGE_LENGTH, PAGE_DIRTY] = (scalar(@{$page->[PAGE_DATA]}), 1);
  3         11  
368 3         11 $self->[ARRAY_PAGE_END_IDX] = $self->[ARRAY_PAGE_BEG_IDX] + $page->[PAGE_LENGTH] - 1;
369             } else {
370             # Axe the elems at the end in the start page
371 0         0 push(@result, splice(@{$page->[PAGE_DATA]}, $del_start_offset, $page->[PAGE_LENGTH] - $del_start_offset));
  0         0  
372 0         0 @$page[PAGE_LENGTH, PAGE_DIRTY] = ($del_start_offset, 1);
373              
374             # Remove pages in the middle
375 0         0 my ($mid_start, $mid_end) = ($del_start_page_num + 1, $del_end_page_num - 1);
376 0 0       0 if ($mid_start <= $mid_end) {
377 0         0 foreach ($mid_start .. $mid_end) {
378 0         0 $self->_switch_to_page($_);
379 0         0 push(@result, @{$page_bank->[$_]->[PAGE_DATA]});
  0         0  
380 0         0 unlink $page_bank->[$_]->[PAGE_FILE];
381             }
382 0         0 splice(@$page_bank, $mid_start, $mid_end - $mid_start + 1);
383             }
384              
385             # Axe the elems in the beginning of the page
386 0         0 $self->_switch_to_page($del_end_page_num);
387 0         0 $page = $page_bank->[$self->[ARRAY_ACTIVE_PAGE_NUM]];
388 0         0 splice(@{$page->[PAGE_DATA]}, 0, $del_end_offset + 1);
  0         0  
389 0 0       0 if ($page->[PAGE_LENGTH] = scalar(@{$page->[PAGE_DATA]})) {
  0         0  
390 0         0 $page->[PAGE_DIRTY] = 1;
391 0         0 $self->[ARRAY_PAGE_BEG_IDX] = $end_index - 1;
392             } else {
393 0         0 unlink $page->[PAGE_FILE];
394 0         0 splice(@$page_bank, $self->[ARRAY_ACTIVE_PAGE_NUM], 1);
395 0         0 $self->[ARRAY_ACTIVE_PAGE_NUM] = 0;
396 0         0 $self->_switch_to_page(0);
397             }
398             }
399             } else {
400             # Expand the array
401 8         19 my ($ins_start_page_num, $ins_start_offset) = $self->_calc_page_offset($end_index);
402 8         12 my $remaining_len = $new_elems_len - $j;
403              
404             # If insertion is needed at the head of the identified page then
405             # either add elems to the previous page or to a new page that is inserted before the identified page
406 8 100 66     22 if ($ins_start_offset == 0 && $ins_start_page_num > 0) {
407 1         1 --$ins_start_page_num;
408 1         4 $page = $self->_switch_to_page($ins_start_page_num);
409 1         2 $ins_start_offset = $page->[PAGE_LENGTH];
410             }
411              
412 8 50       25 $self->_switch_to_page($ins_start_page_num) if $ins_start_page_num != $self->[ARRAY_ACTIVE_PAGE_NUM];
413 8         8 $page = $page_bank->[$ins_start_page_num];
414 8         10 my $page_data = $page->[PAGE_DATA];
415 8         7 my $std_page_size = $self->[ARRAY_PAGE_SIZE];
416              
417 8 100       17 if ($remaining_len + $page->[PAGE_LENGTH] <= $std_page_size) {
418             # All remaining new elems will fit into current page
419 5         15 splice(@$page_data, $ins_start_offset, 0, @_[$j..$#_]);
420 5         6 $page->[PAGE_LENGTH] += $remaining_len;
421 5         8 $self->[ARRAY_PAGE_END_IDX] = $self->[ARRAY_PAGE_BEG_IDX] + $page->[PAGE_LENGTH] - 1;
422             } else {
423             # Split the page
424             # First part of the split
425 3         8 my $second_page = $self->_new_page();
426 3         6 my $tail_first_page = $page->[PAGE_LENGTH] - $ins_start_offset;
427 3         3 my $post_cut_space = $std_page_size - $ins_start_offset;
428 3 100       7 $post_cut_space = $remaining_len if $remaining_len < $post_cut_space;
429 3         4 my @second_page_data = splice(@{$page->[PAGE_DATA]}, $ins_start_offset, $tail_first_page, @_[$j..$j+$post_cut_space-1]);
  3         17  
430 3         6 @$page[PAGE_LENGTH, PAGE_DIRTY] = (scalar(@{$page->[PAGE_DATA]}), 1);
  3         7  
431              
432             #Insert new page into the page bank
433 3         6 $second_page->[PAGE_INDEX] = 0;
434 3         5 splice(@$page_bank, $ins_start_page_num + 1, 0, $second_page);
435              
436             # Second part of the split
437 3         7 $page = $self->_switch_to_page($ins_start_page_num + 1);
438 3         5 $j += $post_cut_space;
439 3         3 $remaining_len = $new_elems_len - $j;
440 3 100       9 if ($remaining_len > 0) {
441 2         4 $post_cut_space = $std_page_size - scalar(@second_page_data);
442 2 50       5 $post_cut_space = $remaining_len if $remaining_len < $post_cut_space;
443 2         7 splice(@second_page_data, 0, 0, @_[$#_-$post_cut_space+1..$#_]);
444 2         2 $new_elems_len -= $post_cut_space;
445 2         3 $remaining_len = $new_elems_len - $j;
446             }
447 3         8 @$page[PAGE_DATA, PAGE_LENGTH, PAGE_DIRTY] = (\@second_page_data, scalar(@second_page_data), 1);
448              
449             # Elems that did not make it to the pages on either side of the split
450 3         9 $self->_switch_to_page($ins_start_page_num);
451 3         12 while ($remaining_len > 0) {
452 4         9 $page = $self->_new_page();
453 4         9 $page->[PAGE_INDEX] = 0;
454 4         6 splice(@$page_bank, $ins_start_page_num + 1, 0, $page);
455 4         9 $self->_switch_to_page($ins_start_page_num + 1);
456 4 100       10 my $elems_count = $std_page_size < $remaining_len ? $std_page_size : $remaining_len;
457 4         15 @$page[PAGE_DATA, PAGE_LENGTH, PAGE_DIRTY] = ([@_[$j..$j+$elems_count-1]], $elems_count, 1);
458 4         6 $j += $elems_count;
459 4         5 $ins_start_page_num++;
460 4         10 $remaining_len = $new_elems_len - $j;
461             }
462             }
463 8         18 $page->[PAGE_DIRTY] = 1;
464             }
465              
466 11         24 $self->[ARRAY_LENGTH] = $self->_calc_length();
467 11         22 $page = $page_bank->[$self->[ARRAY_ACTIVE_PAGE_NUM]];
468 11         29 @$self[ARRAY_PAGE_BEG_IDX, ARRAY_PAGE_END_IDX] = ($page->[PAGE_INDEX], $page->[PAGE_INDEX] + $page->[PAGE_LENGTH] - 1);
469 11         27 return @result;
470              
471             }
472              
473             sub new {
474 0     0 0 0 my $class = shift;
475 0         0 return $class->TIEARRAY(@_);
476             }
477              
478             =pod
479              
480             =head2 page_files
481              
482             The C method available on the I object returns the names of the page files belonging to the array. This can be used to I the array and archive it along with its page files!
483              
484             =cut
485              
486             sub page_files {
487 0     0 1 0 my $self = shift;
488 0         0 return map { $_->[PAGE_FILE] } @{$self->[ARRAY_PAGE_BANK]};
  0         0  
  0         0  
489             }
490              
491             # Private methods
492             sub _calc_page_offset {
493 61616     61616   78377 local($_);
494 61616         78368 my ($self, $index) = @_;
495             # Check if index requested is within active page's index range
496 61616 100 100     405883 return ($self->[ARRAY_ACTIVE_PAGE_NUM], $index - $self->[ARRAY_PAGE_BEG_IDX])
497             if ($index >= $self->[ARRAY_PAGE_BEG_IDX] && $index <= $self->[ARRAY_PAGE_END_IDX]);
498              
499 13895         22049 my $bank = $self->[ARRAY_PAGE_BANK];
500 13895         19030 my $bank_len = @$bank;
501 13895         16772 my ($pn, $page, $page_idx, $page_end_idx);
502 13895         36523 for ($pn = 0; $pn < $bank_len; $pn++) {
503 383144         477223 $page = $bank->[$pn];
504 383144         585006 $page_end_idx = ($page_idx = $page->[PAGE_INDEX]) + $page->[PAGE_LENGTH] - 1;
505 383144 100 66     2010479 return ($pn, $index - $page_idx) if ($index >= $page_idx && $index <= $page_end_idx);
506             }
507              
508 1701         2525 my $std_page_size = $self->[ARRAY_PAGE_SIZE];
509              
510             # Empty array!
511 1701 100       7640 return (int($index / $std_page_size), $index % $std_page_size) if !defined($page);
512              
513             ### If index requested is out of bounds ###
514              
515             # Last page starts out with a standard size
516 1045         1264 $index -= $page_idx;
517 1045 100       3401 return ($pn - 1, $index) if ($index < $std_page_size);
518              
519 660         7897 $index -= $std_page_size;
520 660         3254 return ($pn + int($index / $std_page_size), $index % $std_page_size);
521             }
522              
523             sub _switch_to_page {
524 13644     13644   31720 my ($self, $page_num) = @_;
525 13644         16324 local($_);
526              
527 13644         19396 my $active_page_num = $self->[ARRAY_ACTIVE_PAGE_NUM];
528 13644         18633 my $page_bank = $self->[ARRAY_PAGE_BANK];
529              
530             # Handle empty array
531 13644 100       34432 if ($#$page_bank < 0) {
532 1         5 @$self[ARRAY_ACTIVE_PAGE_NUM, ARRAY_PAGE_BEG_IDX, ARRAY_PAGE_END_IDX] = (-1, 0, -1);
533 1         4 return undef;
534             }
535              
536             # If active page num is not outside the valid range
537 13643 50 33     37730 if ($active_page_num > -1 && $active_page_num <= $#{$self->[ARRAY_PAGE_BANK]}) {
  13643         50133  
538 13643         18796 my $page = $page_bank->[$active_page_num];
539 13643         16039 my $rc = 1;
540 13643 100       33028 if ($page->[PAGE_DIRTY]) {
541             # Write the data to the page file
542 2231         5121 $rc = _store($page->[PAGE_DATA], $page->[PAGE_FILE]);
543 2231 50       53445701 die "Could not write data to page file" unless $rc;
544 2231         5688 $page->[PAGE_DATA] = [];
545 2231         7279 $page->[PAGE_DIRTY] = undef;
546             }
547             }
548              
549             # Switch to page
550 13643         21491 my $page = $page_bank->[$page_num];
551 13643         22576 my $page_file = $page->[PAGE_FILE];
552 13643         21296 my $page_data = [];
553 13643 100 66     346783 $page_data = _retrieve($page_file) if defined($page_file) && -f $page_file;
554              
555 13643         156237749 $page->[PAGE_DATA] = $page_data;
556 13643         78249 $page->[PAGE_DIRTY] = undef;
557 13643         20724 $self->[ARRAY_ACTIVE_PAGE_NUM] = $page_num;
558 13643         41560 @$self[ARRAY_PAGE_BEG_IDX, ARRAY_PAGE_END_IDX] =
559             ($page->[PAGE_INDEX], $page->[PAGE_INDEX] + $page->[PAGE_LENGTH] - 1);
560              
561 13643 50       53305 return($page_data ? $page : undef);
562             }
563              
564             sub _new_page {
565 4042     4042   4872 my ($self) = @_;
566              
567             # [PAGE_DATA, PAGE_LENGTH, PAGE_DIRTY, PAGE_FILE, PAGE_INDEX]
568 4042         29531 return [[] , 0 , 1 , sprintf("%s/arr_%i_%i_%i.pg", $self->[ARRAY_PAGING_DIR], $self, $$, $PAGE_NUM++)];
569             }
570              
571             sub _calc_length {
572 1972     1972   11527 my ($self) = @_;
573              
574             # Setup array length and first index in the array for each page
575 1972         2567 my $len = 0;
576 1972         2271 foreach (@{$self->[ARRAY_PAGE_BANK]}) {
  1972         5535  
577 15175         22220 $_->[PAGE_INDEX] = $len;
578 15175         25857 $len += $_->[PAGE_LENGTH];
579             }
580              
581 1972         7127 return $len;
582             }
583              
584             sub _store {
585 2231     2231   3701 my ($data, $page_file) = @_;
586 2231         7760 $STORE_DELEGATE->($data, $page_file);
587             }
588              
589             sub _retrieve {
590 10319     10319   15286 my ($page_file) = @_;
591 10319         36061 $RETRIEVE_DELEGATE->($page_file);
592             }
593              
594             1;
595              
596             =head1 LIMITATIONS
597              
598             1) C loop must not be used on Cs because the array in foreach expands into an in-memory list. Instead, use iterative loops.
599              
600             for(my $i = 0; $i < scalar(@large_array); $i++) {
601             # Do something with $large_array[$i]
602             }
603              
604             OR
605              
606             # In versions 5.012 and later
607             while(my($i, $val) = each(@large_array)) {
608             # Do something with $val
609             }
610              
611              
612             2) When an update is made to an element's I datastructure then the corresponding page is not marked dirty as it is difficult to track such updates.
613              
614             Suppose C 1> and hash refs are stored as elements in the array.
615              
616             @car_parts = ({name => "wheel", count => 4}, {name => "lamp", count => 8});
617              
618             Then an update to I will B the page dirty. When the page is later switched out the modification would be lost!
619              
620             $car_parts[1]->{count} = 6;
621              
622             The workaround is to assign the element to itself.
623              
624             $car_parts[1] = $car_parts[1];
625              
626              
627             3) When an object is assigned to two elements in I pages they point to two independent objects.
628              
629             Suppose C 2>, then
630              
631             my $wheel = {name => "wheel", count => 4};
632              
633             @car_parts = ($wheel, $wheel, $wheel);
634              
635             print($car_parts[0] == $car_parts[1] ? "Same object\n" : "Independent objects\n");
636             Same object
637              
638             print($car_parts[0] == $car_parts[1] ? "Same object\n" : "Independent objects\n");
639             Independent objects
640              
641             =pod
642              
643             =head1 BUGS
644              
645             None known.
646              
647             =head1 SUPPORT
648              
649             You can find documentation for this module with the perldoc command.
650              
651             perldoc Tie::PagedArray
652              
653             =head1 AUTHOR
654              
655             Kartik Bherin
656              
657             =head1 LICENSE AND COPYRIGHT
658              
659             Copyright (C) 2013 Kartik Bherin.
660              
661             =cut