File Coverage

blib/lib/Data/CompactReadonly/V0/Node.pm
Criterion Covered Total %
statement 124 125 99.2
branch 76 86 88.3
condition 14 21 66.6
subroutine 34 34 100.0
pod n/a
total 248 266 93.2


line stmt bran cond sub pod time code
1             package Data::CompactReadonly::V0::Node;
2             our $VERSION = '0.1.0';
3              
4 18     18   6679 use warnings;
  18         104  
  18         817  
5 18     18   8687 use strict;
  18         57  
  18         617  
6              
7 15     15   4562 use Fcntl qw(:seek);
  15         39  
  15         1879  
8 15     15   8380 use Scalar::Type qw(is_* bool_supported);
  15         50075  
  15         211  
9              
10 15     15   18700 use Devel::StackTrace;
  15         43099  
  15         589  
11 15     15   12387 use Data::CompactReadonly::V0::Text;
  15         113  
  15         733  
12 14     14   8230 use Data::Dumper;
  14         70566  
  14         8983  
13              
14             # return the root node. assumes the $fh is pointing at the start of the node header
15             sub _init {
16 111     111   620 my($class, %args) = @_;
17 111         321 my $self = bless(\%args, $class);
18 111         459 $self->{root} = $self;
19 111         399 return $self->_node_at_current_offset();
20             }
21              
22             # write the root node to the file and, recursively, its children
23             sub _create {
24 187     187   947 my($class, %args) = @_;
25 187 50       631 die("fell through to Data::CompactReadonly::V0::Node::_create when creating a $class\n")
26             if($class ne __PACKAGE__);
27              
28             $class->_type_class(
29             from_data => $args{data}
30 187         680 )->_create(%args);
31             }
32              
33             # stash (in memory) of everything that we've seen while writing the database,
34             # with a pointer to their location in the file so that it can be re-used. We
35             # even stash stringified Dicts/Arrays, which can eat a TON of memory. Yes, we
36             # seem to need to local()ise the config vars in each sub.
37             sub _stash_already_seen {
38 65806     65806   229383 my($class, %args) = @_;
39 65806         137053 local $Data::Dumper::Indent = 0;
40 65806         137728 local $Data::Dumper::Sortkeys = 1;
41 65806 100 66     246097 if(bool_supported && is_bool($args{data})) {
    100          
42             $args{globals}->{already_seen}->{
43             $args{data} ? 'bt' : 'bf'
44 14 100       517 } = tell($args{fh});
45             } elsif(defined($args{data})) {
46             $args{globals}->{already_seen}->{d}->{
47             ref($args{data}) ? Dumper($args{data}) : $args{data}
48 65785 100       2557952 } = tell($args{fh});
49             } else {
50 7         280 $args{globals}->{already_seen}->{u} = tell($args{fh});
51             }
52             }
53              
54             # look in the stash for data that we've seen before and get a pointer to it
55             sub _get_already_seen {
56 131339     131339   488361 my($class, %args) = @_;
57 131339         253626 local $Data::Dumper::Indent = 0;
58 131339         213265 local $Data::Dumper::Sortkeys = 1;
59              
60 131339 100 66     378793 if(bool_supported && is_bool($args{data})) {
    100          
61             return
62             $args{data} ? $args{globals}->{already_seen}->{bt}
63             : $args{globals}->{already_seen}->{bf}
64 18 100       583 } elsif(defined($args{data})) {
65             return
66             $args{globals}->{already_seen}->{d}->{
67             ref($args{data}) ? Dumper($args{data}) : $args{data}
68             }
69 131315 100       4280265 } else {
70 6         163 return $args{globals}->{already_seen}->{u};
71             }
72             }
73              
74             sub _get_next_free_ptr {
75 131502     131502   473293 my($class, %args) = @_;
76 131502         613883 return $args{globals}->{next_free_ptr};
77             }
78              
79             sub _set_next_free_ptr {
80 65806     65806   330707 my($class, %args) = @_;
81 65806         704022 $args{globals}->{next_free_ptr} = tell($args{fh});
82             }
83              
84             # in case the database isn't at the beginning of a file, eg in __DATA__
85             sub _db_base {
86 2717     2717   4802 my $self = shift;
87 2717         5866 return $self->_root()->{db_base};
88             }
89              
90             sub _fast_collections {
91 53     53   99 my $self = shift;
92 53         128 return $self->_root()->{'fast_collections'};
93             }
94              
95             sub _tied {
96 104     104   186 my $self = shift;
97 104         232 return $self->_root()->{'tie'};
98             }
99              
100             # figure out what type the node is from the node specifier byte, then call
101             # the class's _init to get it to read itself from the db
102             sub _node_at_current_offset {
103 865     865   1592 my $self = shift;
104              
105             # for performance, cache the filehandle in this object
106 865   66     2549 $self->{_fh} ||= $self->_fh();
107 865         3015 my $type_class = $self->_type_class(from_byte => $self->_bytes_at_current_offset(1));
108 851         2696 return $type_class->_init(root => $self->_root(), offset => tell($self->{_fh}) - $self->_db_base());
109             }
110              
111             # what's the minimum number of bytes required to store this int?
112             sub _bytes_required_for_int {
113 14     14   4197 no warnings 'portable'; # perl worries about 32 bit machines. I don't.
  14         83  
  14         18278  
114 65776     65776   179525 my($class, $int) = @_;
115             return
116 65776 50       173062 $int <= 0xff ? 1 : # Byte
    100          
    100          
    100          
    100          
117             $int <= 0xffff ? 2 : # Short
118             $int <= 0xffffff ? 3 : # Medium
119             $int <= 0xffffffff ? 4 : # Long
120             $int <= 0xffffffffffffffff ? 8 : # Huge
121             9; # 9 or greater signals too big for 64 bits
122             }
123              
124             # given the number of elements in a Collection, figure out what the appropriate
125             # class is to represent it. NB that only Byte/Short/Medium/Long are allowed, we
126             # don't allow Huge numbers of elements in a Collection
127             sub _sub_type_for_collection_of_length {
128 65716     65716   2746382 my($class, $length) = @_;
129 65716         206633 my $bytes = $class->_bytes_required_for_int($length);
130 65716 50       390615 return $bytes == 1 ? 'Byte' :
    100          
    100          
    100          
131             $bytes == 2 ? 'Short' :
132             $bytes == 3 ? 'Medium' :
133             $bytes == 4 ? 'Long' :
134             undef;
135             }
136              
137             # given a blob of text, figure out its type
138             sub _text_type_for_data {
139 65676     65676   148865 my($class, $data) = @_;
140 65676         102108 return 'Text::'.do {
141 65676 50       271492 $class->_sub_type_for_collection_of_length(
142             length(Data::CompactReadonly::V0::Text->_text_to_bytes($data))
143             ) || die("$class: Invalid: Text too long");
144             };
145             }
146              
147             # work out what node type is required to represent a piece of data
148             sub _type_map_from_data {
149 187     187   477 my($class, $data) = @_;
150             return !defined($data)
151             ? 'Scalar::Null' :
152             (bool_supported && is_bool($data))
153             ? 'Scalar::'.($data ? 'True' : 'False') :
154             ref($data) eq 'ARRAY'
155 21 50       628 ? 'Array::'.do { $class->_sub_type_for_collection_of_length(1 + $#{$data}) ||
  21         84  
156             die("$class: Invalid: Array too long");
157             } :
158             ref($data) eq 'HASH'
159 19 50       597 ? 'Dictionary::'.do { $class->_sub_type_for_collection_of_length(scalar(keys %{$data})) ||
  19         141  
160             die("$class: Invalid: Dictionary too long");
161             } :
162             is_integer($data)
163 187 100 66     819 ? do {
    50          
    100          
    100          
    100          
    100          
    100          
    100          
164 54 100       2719 my $neg = $data < 0 ? 'Negative' : '';
165 54         197 my $bytes = $class->_bytes_required_for_int(abs($data));
166 54 50       329 $bytes == 1 ? "Scalar::${neg}Byte" :
    100          
    100          
    100          
    100          
167             $bytes == 2 ? "Scalar::${neg}Short" :
168             $bytes == 3 ? "Scalar::${neg}Medium" :
169             $bytes == 4 ? "Scalar::${neg}Long" :
170             $bytes < 9 ? "Scalar::${neg}Huge"
171             : "Scalar::Float64"
172             } :
173             is_number($data)
174             ? 'Scalar::Float64' :
175             !ref($data)
176             ? $class->_text_type_for_data($data)
177             : die("Can't yet create from '$data'\n");
178             }
179              
180             my $type_by_bits = {
181             0b00 => 'Text',
182             0b01 => 'Array',
183             0b10 => 'Dictionary',
184             0b11 => 'Scalar'
185             };
186             my $subtype_by_bits = {
187             0b0000 => 'Byte', 0b0001 => 'NegativeByte',
188             0b0010 => 'Short', 0b0011 => 'NegativeShort',
189             0b0100 => 'Medium', 0b0101 => 'NegativeMedium',
190             0b0110 => 'Long', 0b0111 => 'NegativeLong',
191             0b1000 => 'Huge', 0b1001 => 'NegativeHuge',
192             0b1010 => 'Null',
193             0b1011 => 'Float64',
194             0b1100 => 'True',
195             0b1101 => 'False',
196             (map { $_ => 'Reserved' } (0b1110 .. 0b1111))
197             };
198             my $bits_by_type = { reverse %{$type_by_bits} };
199             my $bits_by_subtype = { reverse %{$subtype_by_bits} };
200              
201             # used by classes when serialising themselves to figure out what their
202             # type specifier byte should be
203             sub _type_byte_from_class {
204 65806     65806   123816 my $class = shift;
205 65806         442594 $class =~ /.*::([^:]+)::([^:]+)/;
206 65806         265777 my($type, $subtype) = ($1, $2);
207             return chr(
208             ($bits_by_type->{$type} << 6) +
209 65806         479949 ($bits_by_subtype->{$subtype} << 2)
210             );
211             }
212              
213             # work out what node type is represented by a given node specifier byte
214             sub _type_map_from_byte {
215 864     864   1540 my $class = shift;
216 864         1617 my $in_type = ord(shift());
217              
218 864         2908 my $type = $type_by_bits->{$in_type >> 6};
219 864         15476 my $scalar_type = $subtype_by_bits->{($in_type & 0b111100) >> 2};
220              
221 864 100       5687 die(sprintf("$class: Invalid type: 0b%08b: Reserved\n", $in_type))
222             if($scalar_type eq 'Reserved');
223 860 100 100     5088 die(sprintf("$class: Invalid type: 0b%08b: length $scalar_type\n", $in_type))
224             if($type ne 'Scalar' && $scalar_type =~ /^(Null|Float64|Negative|Huge|True|False)/);
225 851         3925 return join('::', $type, $scalar_type);
226             }
227              
228             # get a class name (having loaded the relevant class) either from_data
229             # (when writing a file) or from_byte (when reading a file)
230             sub _type_class {
231 1051     1051   2928 my($class, $from, $in_type) = @_;
232 1051         2574 my $map_method = "_type_map_$from";
233 1051         4568 my $type_name = "Data::CompactReadonly::V0::".$class->$map_method($in_type);
234 1038 100       16291 unless($type_name->VERSION()) {
235 13     13   11947 eval "use $type_name";
  13     11   63  
  13         390  
  11         9336  
  11         71  
  11         320  
  68         9438  
236 68 50       405 die($@) if($@);
237             }
238 1038         5669 return $type_name;
239             }
240              
241             # read N bytes from the current offset
242             sub _bytes_at_current_offset {
243 3306     3306   7820 my($self, $bytes) = @_;
244             # for performance, cache the filehandle in this object
245 3306   33     8305 $self->{_fh} ||= $self->_fh();
246 3306         6814 my $tell = tell($self->{_fh});
247 3306         32251 my $chars_read = read($self->{_fh}, my $data, $bytes);
248              
249 3306 50       12942 if(!defined($chars_read)) {
    100          
250 0         0 die(
251             "$self: read() failed to read $bytes bytes at offset $tell: $!\n".
252             Devel::StackTrace->new()->as_string()
253             );
254             } elsif($chars_read != $bytes) {
255 1         16 die(
256             "$self: read() tried to read $bytes bytes at offset $tell, got $chars_read: $!\n".
257             Devel::StackTrace->new()->as_string()
258             );
259             }
260 3306         13920 return $data;
261             }
262              
263             # this is a monstrous evil - TODO instantiate classes when writing!
264             # seek to a particular point in the *database* (not in the file). If the
265             # pointer has gone too far for the current pointer size, die. This will be
266             # caught in Data::CompactReadonly::V0->create(), the pointer size incremented, and it will
267             # try again from the start
268             sub _seek {
269 198954     198954   396403 my $self = shift;
270 198954 100       525842 if($#_ == 0) { # for when reading
271 1864         3181 my $to = shift;
272             # for performance, cache the filehandle in this object
273 1864   66     5151 $self->{_fh} ||= $self->_fh();
274 1864         22623 seek($self->{_fh}, $self->_db_base() + $to, SEEK_SET);
275             } else { # for when writing
276 197090         688660 my %args = @_;
277             die($self->_ptr_blown())
278 197090 100       651237 if($args{pointer} >= 256 ** $args{ptr_size});
279 197086         1422990 seek($args{fh}, $args{pointer}, SEEK_SET);
280             }
281             }
282              
283 8     8   10768 sub _ptr_blown { "pointer out of range" }
284              
285             # the offset of the current node
286             sub _offset {
287 1116     1116   2059 my $self = shift;
288 1116         4305 return $self->{offset};
289             }
290              
291             sub _root {
292 5898     5898   9908 my $self = shift;
293 5898         96461 return $self->{root};
294             }
295              
296             # the filehandle, currently only used when reading, see the TODO above
297             # for _seek
298             sub _fh {
299 191     191   421 my $self = shift;
300 191         548 return $self->_root()->{fh};
301             }
302              
303             sub _ptr_size {
304 1626     1626   3408 my $self = shift;
305 1626         3582 return $self->_root()->{ptr_size};
306             }
307              
308             1;