File Coverage

blib/lib/Data/CompactReadonly/V0/Node.pm
Criterion Covered Total %
statement 122 125 97.6
branch 67 86 77.9
condition 11 21 52.3
subroutine 34 34 100.0
pod n/a
total 234 266 87.9


line stmt bran cond sub pod time code
1             package Data::CompactReadonly::V0::Node;
2             our $VERSION = '0.1.0';
3              
4 18     18   4978 use warnings;
  18         49  
  18         476  
5 17     17   4374 use strict;
  17         77  
  17         360  
6              
7 15     15   3359 use Fcntl qw(:seek);
  15         40  
  15         1205  
8 15     15   6231 use Scalar::Type qw(is_* bool_supported);
  15         12086  
  15         177  
9              
10 14     14   8871 use Devel::StackTrace;
  14         31518  
  14         395  
11 14     14   7546 use Data::CompactReadonly::V0::Text;
  14         155  
  14         536  
12 13     13   6336 use Data::Dumper;
  13         39708  
  13         6873  
13              
14             # return the root node. assumes the $fh is pointing at the start of the node header
15             sub _init {
16 111     111   474 my($class, %args) = @_;
17 111         308 my $self = bless(\%args, $class);
18 111         401 $self->{root} = $self;
19 111         323 return $self->_node_at_current_offset();
20             }
21              
22             # write the root node to the file and, recursively, its children
23             sub _create {
24 181     181   838 my($class, %args) = @_;
25 181 50       595 die("fell through to Data::CompactReadonly::V0::Node::_create when creating a $class\n")
26             if($class ne __PACKAGE__);
27              
28             $class->_type_class(
29             from_data => $args{data}
30 181         622 )->_create(%args);
31             }
32              
33             # stash (in memory) of everything that we've seen while writing the database,
34             # with a pointer to their location in the file so that it can be re-used. We
35             # even stash stringified Dicts/Arrays, which can eat a TON of memory. Yes, we
36             # seem to need to local()ise the config vars in each sub.
37             sub _stash_already_seen {
38 65800     65800   186759 my($class, %args) = @_;
39 65800         130622 local $Data::Dumper::Indent = 0;
40 65800         110572 local $Data::Dumper::Sortkeys = 1;
41 65800 50 33     173591 if(bool_supported && is_bool($args{data})) {
    100          
42             $args{globals}->{already_seen}->{
43             $args{data} ? 'bt' : 'bf'
44 0 0       0 } = tell($args{fh});
45             } elsif(defined($args{data})) {
46             $args{globals}->{already_seen}->{d}->{
47             ref($args{data}) ? Dumper($args{data}) : $args{data}
48 65793 100       1157795 } = tell($args{fh});
49             } else {
50 7         121 $args{globals}->{already_seen}->{u} = tell($args{fh});
51             }
52             }
53              
54             # look in the stash for data that we've seen before and get a pointer to it
55             sub _get_already_seen {
56 131339     131339   471565 my($class, %args) = @_;
57 131339         269114 local $Data::Dumper::Indent = 0;
58 131339         224844 local $Data::Dumper::Sortkeys = 1;
59              
60 131339 50 33     358343 if(bool_supported && is_bool($args{data})) {
    100          
61             return
62             $args{data} ? $args{globals}->{already_seen}->{bt}
63             : $args{globals}->{already_seen}->{bf}
64 0 0       0 } elsif(defined($args{data})) {
65             return
66             $args{globals}->{already_seen}->{d}->{
67             ref($args{data}) ? Dumper($args{data}) : $args{data}
68             }
69 131333 100       1252519 } else {
70 6         63 return $args{globals}->{already_seen}->{u};
71             }
72             }
73              
74             sub _get_next_free_ptr {
75 131490     131490   436185 my($class, %args) = @_;
76 131490         533727 return $args{globals}->{next_free_ptr};
77             }
78              
79             sub _set_next_free_ptr {
80 65800     65800   278625 my($class, %args) = @_;
81 65800         772201 $args{globals}->{next_free_ptr} = tell($args{fh});
82             }
83              
84             # in case the database isn't at the beginning of a file, eg in __DATA__
85             sub _db_base {
86 2580     2580   4039 my $self = shift;
87 2580         4661 return $self->_root()->{db_base};
88             }
89              
90             sub _fast_collections {
91 53     53   108 my $self = shift;
92 53         119 return $self->_root()->{'fast_collections'};
93             }
94              
95             sub _tied {
96 104     104   194 my $self = shift;
97 104         201 return $self->_root()->{'tie'};
98             }
99              
100             # figure out what type the node is from the node specifier byte, then call
101             # the class's _init to get it to read itself from the db
102             sub _node_at_current_offset {
103 823     823   1430 my $self = shift;
104              
105             # for performance, cache the filehandle in this object
106 823   66     2005 $self->{_fh} ||= $self->_fh();
107 823         1910 my $type_class = $self->_type_class(from_byte => $self->_bytes_at_current_offset(1));
108 809         2221 return $type_class->_init(root => $self->_root(), offset => tell($self->{_fh}) - $self->_db_base());
109             }
110              
111             # what's the minimum number of bytes required to store this int?
112             sub _bytes_required_for_int {
113 13     13   2654 no warnings 'portable'; # perl worries about 32 bit machines. I don't.
  13         44  
  13         13248  
114 65784     65784   250687 my($class, $int) = @_;
115             return
116 65784 50       150955 $int <= 0xff ? 1 : # Byte
    100          
    100          
    100          
    100          
117             $int <= 0xffff ? 2 : # Short
118             $int <= 0xffffff ? 3 : # Medium
119             $int <= 0xffffffff ? 4 : # Long
120             $int <= 0xffffffffffffffff ? 8 : # Huge
121             9; # 9 or greater signals too big for 64 bits
122             }
123              
124             # given the number of elements in a Collection, figure out what the appropriate
125             # class is to represent it. NB that only Byte/Short/Medium/Long are allowed, we
126             # don't allow Huge numbers of elements in a Collection
127             sub _sub_type_for_collection_of_length {
128 65720     65720   3238413 my($class, $length) = @_;
129 65720         150474 my $bytes = $class->_bytes_required_for_int($length);
130 65720 50       336621 return $bytes == 1 ? 'Byte' :
    100          
    100          
    100          
131             $bytes == 2 ? 'Short' :
132             $bytes == 3 ? 'Medium' :
133             $bytes == 4 ? 'Long' :
134             undef;
135             }
136              
137             # given a blob of text, figure out its type
138             sub _text_type_for_data {
139 65680     65680   148183 my($class, $data) = @_;
140 65680         98744 return 'Text::'.do {
141 65680 50       210016 $class->_sub_type_for_collection_of_length(
142             length(Data::CompactReadonly::V0::Text->_text_to_bytes($data))
143             ) || die("$class: Invalid: Text too long");
144             };
145             }
146              
147             # work out what node type is required to represent a piece of data
148             sub _type_map_from_data {
149 181     181   405 my($class, $data) = @_;
150             return !defined($data)
151             ? 'Scalar::Null' :
152             (bool_supported && is_bool($data))
153             ? 'Scalar::'.($data ? 'True' : 'False') :
154             ref($data) eq 'ARRAY'
155 21 50       171 ? 'Array::'.do { $class->_sub_type_for_collection_of_length(1 + $#{$data}) ||
  21         83  
156             die("$class: Invalid: Array too long");
157             } :
158             ref($data) eq 'HASH'
159 19 50       192 ? 'Dictionary::'.do { $class->_sub_type_for_collection_of_length(scalar(keys %{$data})) ||
  19         112  
160             die("$class: Invalid: Dictionary too long");
161             } :
162             is_integer($data)
163 181 0 33     790 ? do {
    50          
    100          
    100          
    100          
    100          
    50          
    100          
164 58 100       1417 my $neg = $data < 0 ? 'Negative' : '';
165 58         188 my $bytes = $class->_bytes_required_for_int(abs($data));
166 58 50       357 $bytes == 1 ? "Scalar::${neg}Byte" :
    100          
    100          
    100          
    100          
167             $bytes == 2 ? "Scalar::${neg}Short" :
168             $bytes == 3 ? "Scalar::${neg}Medium" :
169             $bytes == 4 ? "Scalar::${neg}Long" :
170             $bytes < 9 ? "Scalar::${neg}Huge"
171             : "Scalar::Float64"
172             } :
173             is_number($data)
174             ? 'Scalar::Float64' :
175             !ref($data)
176             ? $class->_text_type_for_data($data)
177             : die("Can't yet create from '$data'\n");
178             }
179              
180             my $type_by_bits = {
181             0b00 => 'Text',
182             0b01 => 'Array',
183             0b10 => 'Dictionary',
184             0b11 => 'Scalar'
185             };
186             my $subtype_by_bits = {
187             0b0000 => 'Byte', 0b0001 => 'NegativeByte',
188             0b0010 => 'Short', 0b0011 => 'NegativeShort',
189             0b0100 => 'Medium', 0b0101 => 'NegativeMedium',
190             0b0110 => 'Long', 0b0111 => 'NegativeLong',
191             0b1000 => 'Huge', 0b1001 => 'NegativeHuge',
192             0b1010 => 'Null',
193             0b1011 => 'Float64',
194             0b1100 => 'True',
195             0b1101 => 'False',
196             (map { $_ => 'Reserved' } (0b1110 .. 0b1111))
197             };
198             my $bits_by_type = { reverse %{$type_by_bits} };
199             my $bits_by_subtype = { reverse %{$subtype_by_bits} };
200              
201             # used by classes when serialising themselves to figure out what their
202             # type specifier byte should be
203             sub _type_byte_from_class {
204 65800     65800   118382 my $class = shift;
205 65800         331329 $class =~ /.*::([^:]+)::([^:]+)/;
206 65800         232913 my($type, $subtype) = ($1, $2);
207             return chr(
208             ($bits_by_type->{$type} << 6) +
209 65800         360523 ($bits_by_subtype->{$subtype} << 2)
210             );
211             }
212              
213             # work out what node type is represented by a given node specifier byte
214             sub _type_map_from_byte {
215 822     822   1313 my $class = shift;
216 822         1435 my $in_type = ord(shift());
217              
218 822         2086 my $type = $type_by_bits->{$in_type >> 6};
219 822         1755 my $scalar_type = $subtype_by_bits->{($in_type & 0b111100) >> 2};
220              
221 822 100       1776 die(sprintf("$class: Invalid type: 0b%08b: Reserved\n", $in_type))
222             if($scalar_type eq 'Reserved');
223 818 100 100     4052 die(sprintf("$class: Invalid type: 0b%08b: length $scalar_type\n", $in_type))
224             if($type ne 'Scalar' && $scalar_type =~ /^(Null|Float64|Negative|Huge|True|False)/);
225 809         2961 return join('::', $type, $scalar_type);
226             }
227              
228             # get a class name (having loaded the relevant class) either from_data
229             # (when writing a file) or from_byte (when reading a file)
230             sub _type_class {
231 1003     1003   2624 my($class, $from, $in_type) = @_;
232 1003         2240 my $map_method = "_type_map_$from";
233 1003         2977 my $type_name = "Data::CompactReadonly::V0::".$class->$map_method($in_type);
234 990 100       11352 unless($type_name->VERSION()) {
235 9     9   5324 eval "use $type_name";
  9     8   91  
  9         227  
  8         4909  
  8         28  
  8         173  
  64         5979  
236 64 50       343 die($@) if($@);
237             }
238 990         4477 return $type_name;
239             }
240              
241             # read N bytes from the current offset
242             sub _bytes_at_current_offset {
243 3161     3161   5906 my($self, $bytes) = @_;
244             # for performance, cache the filehandle in this object
245 3161   33     6990 $self->{_fh} ||= $self->_fh();
246 3161         5725 my $tell = tell($self->{_fh});
247 3161         32299 my $chars_read = read($self->{_fh}, my $data, $bytes);
248              
249 3161 50       10261 if(!defined($chars_read)) {
    100          
250 0         0 die(
251             "$self: read() failed to read $bytes bytes at offset $tell: $!\n".
252             Devel::StackTrace->new()->as_string()
253             );
254             } elsif($chars_read != $bytes) {
255 1         10 die(
256             "$self: read() tried to read $bytes bytes at offset $tell, got $chars_read: $!\n".
257             Devel::StackTrace->new()->as_string()
258             );
259             }
260 3160         11421 return $data;
261             }
262              
263             # this is a monstrous evil - TODO instantiate classes when writing!
264             # seek to a particular point in the *database* (not in the file). If the
265             # pointer has gone too far for the current pointer size, die. This will be
266             # caught in Data::CompactReadonly::V0->create(), the pointer size incremented, and it will
267             # try again from the start
268             sub _seek {
269 198853     198853   387600 my $self = shift;
270 198853 100       507872 if($#_ == 0) { # for when reading
271 1769         2831 my $to = shift;
272             # for performance, cache the filehandle in this object
273 1769   66     4529 $self->{_fh} ||= $self->_fh();
274 1769         3824 seek($self->{_fh}, $self->_db_base() + $to, SEEK_SET);
275             } else { # for when writing
276 197084         582903 my %args = @_;
277             die($self->_ptr_blown())
278 197084 100       625664 if($args{pointer} >= 256 ** $args{ptr_size});
279 197080         1778622 seek($args{fh}, $args{pointer}, SEEK_SET);
280             }
281             }
282              
283 8     8   15661 sub _ptr_blown { "pointer out of range" }
284              
285             # the offset of the current node
286             sub _offset {
287 1063     1063   1799 my $self = shift;
288 1063         3119 return $self->{offset};
289             }
290              
291             sub _root {
292 5621     5621   8470 my $self = shift;
293 5621         34893 return $self->{root};
294             }
295              
296             # the filehandle, currently only used when reading, see the TODO above
297             # for _seek
298             sub _fh {
299 191     191   348 my $self = shift;
300 191         426 return $self->_root()->{fh};
301             }
302              
303             sub _ptr_size {
304 1539     1539   3043 my $self = shift;
305 1539         2976 return $self->_root()->{ptr_size};
306             }
307              
308             1;