File Coverage

blib/lib/Data/CompactReadonly/V0/Node.pm
Criterion Covered Total %
statement 116 117 99.1
branch 72 82 87.8
condition 8 12 66.6
subroutine 33 33 100.0
pod n/a
total 229 244 93.8


line stmt bran cond sub pod time code
1             package Data::CompactReadonly::V0::Node;
2             our $VERSION = '0.0.5';
3              
4 18     18   5507 use warnings;
  18         50  
  18         465  
5 17     17   4818 use strict;
  17         47  
  17         364  
6              
7 14     14   3114 use Fcntl qw(:seek);
  14         33  
  14         1249  
8              
9 14     14   7634 use Devel::StackTrace;
  14         31146  
  14         406  
10 14     14   7496 use Data::CompactReadonly::V0::Text;
  14         45  
  14         477  
11 14     14   7222 use Data::Dumper;
  14         38846  
  14         5709  
12              
13             # return the root node. assumes the $fh is pointing at the start of the node header
14             sub _init {
15 102     102   521 my($class, %args) = @_;
16 102         291 my $self = bless(\%args, $class);
17 102         372 $self->{root} = $self;
18 102         293 return $self->_node_at_current_offset();
19             }
20              
21             # write the root node to the file and, recursively, its children
22             sub _create {
23 166     166   995 my($class, %args) = @_;
24 166 50       506 die("fell through to Data::CompactReadonly::V0::Node::_create when creating a $class\n")
25             if($class ne __PACKAGE__);
26              
27             $class->_type_class(
28             from_data => $args{data}
29 166         560 )->_create(%args);
30             }
31              
32             # stash (in memory) of everything that we've seen while writing the database,
33             # with a pointer to their location in the file so that it can be re-used. We
34             # even stash stringified Dicts/Arrays, which can eat a TON of memory. Yes, we
35             # seem to need to local()ise the config vars in each sub.
36             sub _stash_already_seen {
37 65776     65776   194094 my($class, %args) = @_;
38 65776         125339 local $Data::Dumper::Indent = 0;
39 65776         103169 local $Data::Dumper::Sortkeys = 1;
40 65776 100       144035 if(defined($args{data})) {
41             $args{globals}->{already_seen}->{d}->{
42             ref($args{data}) ? Dumper($args{data}) : $args{data}
43 65769 100       909450 } = tell($args{fh});
44             } else {
45 7         80 $args{globals}->{already_seen}->{u} = tell($args{fh});
46             }
47             }
48              
49             # look in the stash for data that we've seen before and get a pointer to it
50             sub _get_already_seen {
51 131312     131312   449473 my($class, %args) = @_;
52 131312         246818 local $Data::Dumper::Indent = 0;
53 131312         210216 local $Data::Dumper::Sortkeys = 1;
54             return defined($args{data})
55             ? $args{globals}->{already_seen}->{d}->{
56             ref($args{data}) ? Dumper($args{data}) : $args{data}
57             }
58 131312 100       703085 : $args{globals}->{already_seen}->{u};
    100          
59             }
60              
61             sub _get_next_free_ptr {
62 131460     131460   375308 my($class, %args) = @_;
63 131460         495479 return $args{globals}->{next_free_ptr};
64             }
65              
66             sub _set_next_free_ptr {
67 65776     65776   277552 my($class, %args) = @_;
68 65776         780312 $args{globals}->{next_free_ptr} = tell($args{fh});
69             }
70              
71             # in case the database isn't at the beginning of a file, eg in __DATA__
72             sub _db_base {
73 2472     2472   3706 my $self = shift;
74 2472         4257 return $self->_root()->{db_base};
75             }
76              
77             sub _fast_collections {
78 53     53   96 my $self = shift;
79 53         112 return $self->_root()->{'fast_collections'};
80             }
81              
82             sub _tied {
83 104     104   200 my $self = shift;
84 104         214 return $self->_root()->{'tie'};
85             }
86              
87             # figure out what type the node is from the node specifier byte, then call
88             # the class's _init to get it to read itself from the db
89             sub _node_at_current_offset {
90 786     786   1397 my $self = shift;
91              
92             # for performance, cache the filehandle in this object
93 786   66     1941 $self->{_fh} ||= $self->_fh();
94 786         1791 my $type_class = $self->_type_class(from_byte => $self->_bytes_at_current_offset(1));
95 770         2043 return $type_class->_init(root => $self->_root(), offset => tell($self->{_fh}) - $self->_db_base());
96             }
97              
98             # what's the minimum number of bytes required to store this int?
99             sub _bytes_required_for_int {
100 13     13   2717 no warnings 'portable'; # perl worries about 32 bit machines. I don't.
  13         48  
  13         12979  
101 65762     65762   242963 my($class, $int) = @_;
102             return
103 65762 50       142245 $int <= 0xff ? 1 : # Byte
    100          
    100          
    100          
    100          
104             $int <= 0xffff ? 2 : # Short
105             $int <= 0xffffff ? 3 : # Medium
106             $int <= 0xffffffff ? 4 : # Long
107             $int <= 0xffffffffffffffff ? 8 : # Huge
108             9; # 9 or greater signals too big for 64 bits
109             }
110              
111             # given the number of elements in a Collection, figure out what the appropriate
112             # class is to represent it. NB that only Byte/Short/Medium/Long are allowed, we
113             # don't allow Huge numbers of elements in a Collection
114             sub _sub_type_for_collection_of_length {
115 65698     65698   3100908 my($class, $length) = @_;
116 65698         148607 my $bytes = $class->_bytes_required_for_int($length);
117 65698 50       296993 return $bytes == 1 ? 'Byte' :
    100          
    100          
    100          
118             $bytes == 2 ? 'Short' :
119             $bytes == 3 ? 'Medium' :
120             $bytes == 4 ? 'Long' :
121             undef;
122             }
123              
124             # given a blob of text, figure out its type
125             sub _text_type_for_data {
126 65658     65658   135923 my($class, $data) = @_;
127 65658         107379 return 'Text::'.do {
128 65658 50       195860 $class->_sub_type_for_collection_of_length(
129             length(Data::CompactReadonly::V0::Text->_text_to_bytes($data))
130             ) || die("$class: Invalid: Text too long");
131             };
132             }
133              
134             # work out what node type is required to represent a piece of data. At least in
135             # the case of numbers it might be better to look at the SV, as this won't distinguish
136             # between 2 (the number) and "2" (the string).
137             sub _type_map_from_data {
138 166     166   349 my($class, $data) = @_;
139             return !defined($data)
140             ? 'Scalar::Null' :
141             ref($data) eq 'ARRAY'
142 21 50       43 ? 'Array::'.do { $class->_sub_type_for_collection_of_length(1 + $#{$data}) ||
  21         85  
143             die("$class: Invalid: Array too long");
144             } :
145             ref($data) eq 'HASH'
146 19 50       47 ? 'Dictionary::'.do { $class->_sub_type_for_collection_of_length(scalar(keys %{$data})) ||
  19         117  
147             die("$class: Invalid: Dictionary too long");
148             } :
149             $data =~ /
150             ^-? # don't want to numify 00.7 (but 0.07 is fine)
151             ( 0 | [1-9][0-9]* ) # 0, or 1-9 followed by any number of digits
152             \. # decimal point
153             [0-9]*[1-9] # digits, must not end in zero
154             ([eE][+-]?[0-9]+)?$ # exponent
155             /x
156             ? 'Scalar::Float' :
157             $data =~ /
158             ^(-?) # don't want to numify 007
159             ( 0 | [1-9][0-9]* )$ # 0, or 1-9 followed by any number of digits
160             /x
161 166 50       3901 ? do {
    100          
    100          
    100          
    100          
    100          
162 58         172 my $bytes = $class->_bytes_required_for_int($2);
163 58 100       457 $bytes == 1 ? 'Scalar::'.($1 ? 'Negative' : '').'Byte' :
    100          
    100          
    100          
    100          
    50          
    100          
    100          
    100          
    100          
164             $bytes == 2 ? 'Scalar::'.($1 ? 'Negative' : '').'Short' :
165             $bytes == 3 ? 'Scalar::'.($1 ? 'Negative' : '').'Medium' :
166             $bytes == 4 ? 'Scalar::'.($1 ? 'Negative' : '').'Long' :
167             $bytes < 9 ? 'Scalar::'.($1 ? 'Negative' : '').'Huge' :
168             'Scalar::Float'
169             } :
170             !ref($data)
171             ? $class->_text_type_for_data($data)
172             : die("Can't yet create from '$data'\n");
173             }
174              
175             my $type_by_bits = {
176             0b00 => 'Text',
177             0b01 => 'Array',
178             0b10 => 'Dictionary',
179             0b11 => 'Scalar'
180             };
181             my $subtype_by_bits = {
182             0b0000 => 'Byte', 0b0001 => 'NegativeByte',
183             0b0010 => 'Short', 0b0011 => 'NegativeShort',
184             0b0100 => 'Medium', 0b0101 => 'NegativeMedium',
185             0b0110 => 'Long', 0b0111 => 'NegativeLong',
186             0b1000 => 'Huge', 0b1001 => 'NegativeHuge',
187             0b1010 => 'Null',
188             0b1011 => 'Float',
189             (map { $_ => 'Reserved' } (0b1100 .. 0b1111))
190             };
191             my $bits_by_type = { reverse %{$type_by_bits} };
192             my $bits_by_subtype = { reverse %{$subtype_by_bits} };
193              
194             # used by classes when serialising themselves to figure out what their
195             # type specifier byte should be
196             sub _type_byte_from_class {
197 65776     65776   113427 my $class = shift;
198 65776         328020 $class =~ /.*::([^:]+)::([^:]+)/;
199 65776         223633 my($type, $subtype) = ($1, $2);
200             return chr(
201             ($bits_by_type->{$type} << 6) +
202 65776         353297 ($bits_by_subtype->{$subtype} << 2)
203             );
204             }
205              
206             # work out what node type is represented by a given node specifier byte
207             sub _type_map_from_byte {
208 785     785   1275 my $class = shift;
209 785         1291 my $in_type = ord(shift());
210              
211 785         2005 my $type = $type_by_bits->{$in_type >> 6};
212 785         1707 my $scalar_type = $subtype_by_bits->{($in_type & 0b111100) >> 2};
213              
214 785 100       1762 die(sprintf("$class: Invalid type: 0b%08b: Reserved\n", $in_type))
215             if($scalar_type eq 'Reserved');
216 777 100 100     3819 die(sprintf("$class: Invalid type: 0b%08b: length $scalar_type\n", $in_type))
217             if($type ne 'Scalar' && $scalar_type =~ /^(Null|Float|Negative|Huge)/);
218 770         2630 return join('::', $type, $scalar_type);
219             }
220              
221             # get a class name (having loaded the relevant class) either from_data
222             # (when writing a file) or from_byte (when reading a file)
223             sub _type_class {
224 951     951   2198 my($class, $from, $in_type) = @_;
225 951         2090 my $map_method = "_type_map_$from";
226 951         2783 my $type_name = "Data::CompactReadonly::V0::".$class->$map_method($in_type);
227 936 100       9762 unless($type_name->VERSION()) {
228 13     13   8137 eval "use $type_name";
  13     8   42  
  13         288  
  8         5251  
  8         26  
  8         230  
  62         5791  
229 62 50       399 die($@) if($@);
230             }
231 936         4053 return $type_name;
232             }
233              
234             # read N bytes from the current offset
235             sub _bytes_at_current_offset {
236 3013     3013   5585 my($self, $bytes) = @_;
237             # for performance, cache the filehandle in this object
238 3013   33     6236 $self->{_fh} ||= $self->_fh();
239 3013         5066 my $tell = tell($self->{_fh});
240 3013         30567 my $chars_read = read($self->{_fh}, my $data, $bytes);
241              
242 3013 50       9281 if(!defined($chars_read)) {
    100          
243 0         0 die(
244             "$self: read() failed to read $bytes bytes at offset $tell: $!\n".
245             Devel::StackTrace->new()->as_string()
246             );
247             } elsif($chars_read != $bytes) {
248 1         12 die(
249             "$self: read() tried to read $bytes bytes at offset $tell, got $chars_read: $!\n".
250             Devel::StackTrace->new()->as_string()
251             );
252             }
253 3012         10349 return $data;
254             }
255              
256             # this is a monstrous evil - TODO instantiate classes when writing!
257             # seek to a particular point in the *database* (not in the file). If the
258             # pointer has gone too far for the current pointer size, die. This will be
259             # caught in Data::CompactReadonly::V0->create(), the pointer size incremented, and it will
260             # try again from the start
261             sub _seek {
262 198742     198742   365166 my $self = shift;
263 198742 100       457935 if($#_ == 0) { # for when reading
264 1700         2435 my $to = shift;
265             # for performance, cache the filehandle in this object
266 1700   66     4015 $self->{_fh} ||= $self->_fh();
267 1700         4146 seek($self->{_fh}, $self->_db_base() + $to, SEEK_SET);
268             } else { # for when writing
269 197042         534455 my %args = @_;
270             die($self->_ptr_blown())
271 197042 100       573028 if($args{pointer} >= 256 ** $args{ptr_size});
272 197038         1656546 seek($args{fh}, $args{pointer}, SEEK_SET);
273             }
274             }
275              
276 8     8   14080 sub _ptr_blown { "pointer out of range" }
277              
278             # the offset of the current node
279             sub _offset {
280 1022     1022   1704 my $self = shift;
281 1022         2993 return $self->{offset};
282             }
283              
284             sub _root {
285 5393     5393   7762 my $self = shift;
286 5393         33134 return $self->{root};
287             }
288              
289             # the filehandle, currently only used when reading, see the TODO above
290             # for _seek
291             sub _fh {
292 182     182   326 my $self = shift;
293 182         433 return $self->_root()->{fh};
294             }
295              
296             sub _ptr_size {
297 1480     1480   2785 my $self = shift;
298 1480         2638 return $self->_root()->{ptr_size};
299             }
300              
301             1;