File Coverage

blib/lib/Data/CompactReadonly/V0/Node.pm
Criterion Covered Total %
statement 120 121 99.1
branch 64 74 86.4
condition 8 12 66.6
subroutine 34 34 100.0
pod n/a
total 226 241 93.7


line stmt bran cond sub pod time code
1             package Data::CompactReadonly::V0::Node;
2             our $VERSION = '0.0.6';
3              
4 18     18   4645 use warnings;
  18         43  
  18         412  
5 17     17   4109 use strict;
  17         38  
  17         323  
6              
7 14     14   2658 use Fcntl qw(:seek);
  14         62  
  14         1081  
8 14     14   6124 use Scalar::Type qw(is_*);
  14         6977  
  14         131  
9              
10 14     14   7791 use Devel::StackTrace;
  14         26641  
  14         327  
11 14     14   6501 use Data::CompactReadonly::V0::Text;
  14         162  
  14         384  
12 13     13   5580 use Data::Dumper;
  13         32734  
  13         4927  
13              
14             # return the root node. assumes the $fh is pointing at the start of the node header
15             sub _init {
16 109     109   450 my($class, %args) = @_;
17 109         266 my $self = bless(\%args, $class);
18 109         409 $self->{root} = $self;
19 109         297 return $self->_node_at_current_offset();
20             }
21              
22             # write the root node to the file and, recursively, its children
23             sub _create {
24 173     173   672 my($class, %args) = @_;
25 173 50       464 die("fell through to Data::CompactReadonly::V0::Node::_create when creating a $class\n")
26             if($class ne __PACKAGE__);
27              
28             $class->_type_class(
29             from_data => $args{data}
30 173         469 )->_create(%args);
31             }
32              
33             # stash (in memory) of everything that we've seen while writing the database,
34             # with a pointer to their location in the file so that it can be re-used. We
35             # even stash stringified Dicts/Arrays, which can eat a TON of memory. Yes, we
36             # seem to need to local()ise the config vars in each sub.
37             sub _stash_already_seen {
38 65783     65783   149377 my($class, %args) = @_;
39 65783         118732 local $Data::Dumper::Indent = 0;
40 65783         83900 local $Data::Dumper::Sortkeys = 1;
41 65783 100       104324 if(defined($args{data})) {
42             $args{globals}->{already_seen}->{d}->{
43             ref($args{data}) ? Dumper($args{data}) : $args{data}
44 65776 100       744919 } = tell($args{fh});
45             } else {
46 7         64 $args{globals}->{already_seen}->{u} = tell($args{fh});
47             }
48             }
49              
50             # look in the stash for data that we've seen before and get a pointer to it
51             sub _get_already_seen {
52 131312     131312   353741 my($class, %args) = @_;
53 131312         197867 local $Data::Dumper::Indent = 0;
54 131312         176192 local $Data::Dumper::Sortkeys = 1;
55             return defined($args{data})
56             ? $args{globals}->{already_seen}->{d}->{
57             ref($args{data}) ? Dumper($args{data}) : $args{data}
58             }
59 131312 100       578535 : $args{globals}->{already_seen}->{u};
    100          
60             }
61              
62             sub _get_next_free_ptr {
63 131460     131460   309400 my($class, %args) = @_;
64 131460         395952 return $args{globals}->{next_free_ptr};
65             }
66              
67             sub _set_next_free_ptr {
68 65783     65783   216487 my($class, %args) = @_;
69 65783         646737 $args{globals}->{next_free_ptr} = tell($args{fh});
70             }
71              
72             # in case the database isn't at the beginning of a file, eg in __DATA__
73             sub _db_base {
74 2489     2489   3071 my $self = shift;
75 2489         3473 return $self->_root()->{db_base};
76             }
77              
78             sub _fast_collections {
79 53     53   77 my $self = shift;
80 53         106 return $self->_root()->{'fast_collections'};
81             }
82              
83             sub _tied {
84 104     104   157 my $self = shift;
85 104         167 return $self->_root()->{'tie'};
86             }
87              
88             # figure out what type the node is from the node specifier byte, then call
89             # the class's _init to get it to read itself from the db
90             sub _node_at_current_offset {
91 795     795   1159 my $self = shift;
92              
93             # for performance, cache the filehandle in this object
94 795   66     1715 $self->{_fh} ||= $self->_fh();
95 795         1447 my $type_class = $self->_type_class(from_byte => $self->_bytes_at_current_offset(1));
96 779         1683 return $type_class->_init(root => $self->_root(), offset => tell($self->{_fh}) - $self->_db_base());
97             }
98              
99             # what's the minimum number of bytes required to store this int?
100             sub _bytes_required_for_int {
101 13     13   2362 no warnings 'portable'; # perl worries about 32 bit machines. I don't.
  13         37  
  13         10883  
102 65767     65767   204500 my($class, $int) = @_;
103             return
104 65767 50       112552 $int <= 0xff ? 1 : # Byte
    100          
    100          
    100          
    100          
105             $int <= 0xffff ? 2 : # Short
106             $int <= 0xffffff ? 3 : # Medium
107             $int <= 0xffffffff ? 4 : # Long
108             $int <= 0xffffffffffffffff ? 8 : # Huge
109             9; # 9 or greater signals too big for 64 bits
110             }
111              
112             # given the number of elements in a Collection, figure out what the appropriate
113             # class is to represent it. NB that only Byte/Short/Medium/Long are allowed, we
114             # don't allow Huge numbers of elements in a Collection
115             sub _sub_type_for_collection_of_length {
116 65707     65707   2511129 my($class, $length) = @_;
117 65707         122311 my $bytes = $class->_bytes_required_for_int($length);
118 65707 50       246913 return $bytes == 1 ? 'Byte' :
    100          
    100          
    100          
119             $bytes == 2 ? 'Short' :
120             $bytes == 3 ? 'Medium' :
121             $bytes == 4 ? 'Long' :
122             undef;
123             }
124              
125             # given a blob of text, figure out its type
126             sub _text_type_for_data {
127 65667     65667   120957 my($class, $data) = @_;
128 65667         75648 return 'Text::'.do {
129 65667 50       180527 $class->_sub_type_for_collection_of_length(
130             length(Data::CompactReadonly::V0::Text->_text_to_bytes($data))
131             ) || die("$class: Invalid: Text too long");
132             };
133             }
134              
135             # work out what node type is required to represent a piece of data. At least in
136             # the case of numbers it might be better to look at the SV, as this won't distinguish
137             # between 2 (the number) and "2" (the string).
138             sub _type_map_from_data {
139 173     173   318 my($class, $data) = @_;
140             return !defined($data)
141             ? 'Scalar::Null' :
142             ref($data) eq 'ARRAY'
143 21 50       36 ? 'Array::'.do { $class->_sub_type_for_collection_of_length(1 + $#{$data}) ||
  21         65  
144             die("$class: Invalid: Array too long");
145             } :
146             ref($data) eq 'HASH'
147 19 50       35 ? 'Dictionary::'.do { $class->_sub_type_for_collection_of_length(scalar(keys %{$data})) ||
  19         71  
148             die("$class: Invalid: Dictionary too long");
149             } :
150             is_integer($data)
151 173 50       804 ? do {
    100          
    100          
    100          
    100          
    100          
152 54 100       793 my $neg = $data < 0 ? 'Negative' : '';
153 54         145 my $bytes = $class->_bytes_required_for_int(abs($data));
154 54 50       259 $bytes == 1 ? "Scalar::${neg}Byte" :
    100          
    100          
    100          
    100          
155             $bytes == 2 ? "Scalar::${neg}Short" :
156             $bytes == 3 ? "Scalar::${neg}Medium" :
157             $bytes == 4 ? "Scalar::${neg}Long" :
158             $bytes < 9 ? "Scalar::${neg}Huge"
159             : "Scalar::Float"
160             } :
161             is_number($data)
162             ? 'Scalar::Float' :
163             !ref($data)
164             ? $class->_text_type_for_data($data)
165             : die("Can't yet create from '$data'\n");
166             }
167              
168             my $type_by_bits = {
169             0b00 => 'Text',
170             0b01 => 'Array',
171             0b10 => 'Dictionary',
172             0b11 => 'Scalar'
173             };
174             my $subtype_by_bits = {
175             0b0000 => 'Byte', 0b0001 => 'NegativeByte',
176             0b0010 => 'Short', 0b0011 => 'NegativeShort',
177             0b0100 => 'Medium', 0b0101 => 'NegativeMedium',
178             0b0110 => 'Long', 0b0111 => 'NegativeLong',
179             0b1000 => 'Huge', 0b1001 => 'NegativeHuge',
180             0b1010 => 'Null',
181             0b1011 => 'Float',
182             (map { $_ => 'Reserved' } (0b1100 .. 0b1111))
183             };
184             my $bits_by_type = { reverse %{$type_by_bits} };
185             my $bits_by_subtype = { reverse %{$subtype_by_bits} };
186              
187             # used by classes when serialising themselves to figure out what their
188             # type specifier byte should be
189             sub _type_byte_from_class {
190 65783     65783   93750 my $class = shift;
191 65783         257091 $class =~ /.*::([^:]+)::([^:]+)/;
192 65783         178037 my($type, $subtype) = ($1, $2);
193             return chr(
194             ($bits_by_type->{$type} << 6) +
195 65783         304231 ($bits_by_subtype->{$subtype} << 2)
196             );
197             }
198              
199             # work out what node type is represented by a given node specifier byte
200             sub _type_map_from_byte {
201 794     794   1080 my $class = shift;
202 794         1074 my $in_type = ord(shift());
203              
204 794         1647 my $type = $type_by_bits->{$in_type >> 6};
205 794         1457 my $scalar_type = $subtype_by_bits->{($in_type & 0b111100) >> 2};
206              
207 794 100       1544 die(sprintf("$class: Invalid type: 0b%08b: Reserved\n", $in_type))
208             if($scalar_type eq 'Reserved');
209 786 100 100     3147 die(sprintf("$class: Invalid type: 0b%08b: length $scalar_type\n", $in_type))
210             if($type ne 'Scalar' && $scalar_type =~ /^(Null|Float|Negative|Huge)/);
211 779         2133 return join('::', $type, $scalar_type);
212             }
213              
214             # get a class name (having loaded the relevant class) either from_data
215             # (when writing a file) or from_byte (when reading a file)
216             sub _type_class {
217 967     967   1801 my($class, $from, $in_type) = @_;
218 967         1661 my $map_method = "_type_map_$from";
219 967         2441 my $type_name = "Data::CompactReadonly::V0::".$class->$map_method($in_type);
220 952 100       8328 unless($type_name->VERSION()) {
221 9     9   4722 eval "use $type_name";
  9     8   59  
  9         194  
  8         4128  
  8         23  
  8         149  
  62         4852  
222 62 50       276 die($@) if($@);
223             }
224 952         3271 return $type_name;
225             }
226              
227             # read N bytes from the current offset
228             sub _bytes_at_current_offset {
229 3048     3048   4734 my($self, $bytes) = @_;
230             # for performance, cache the filehandle in this object
231 3048   33     5505 $self->{_fh} ||= $self->_fh();
232 3048         4550 my $tell = tell($self->{_fh});
233 3048         26554 my $chars_read = read($self->{_fh}, my $data, $bytes);
234              
235 3048 50       7706 if(!defined($chars_read)) {
    100          
236 0         0 die(
237             "$self: read() failed to read $bytes bytes at offset $tell: $!\n".
238             Devel::StackTrace->new()->as_string()
239             );
240             } elsif($chars_read != $bytes) {
241 1         9 die(
242             "$self: read() tried to read $bytes bytes at offset $tell, got $chars_read: $!\n".
243             Devel::StackTrace->new()->as_string()
244             );
245             }
246 3047         8656 return $data;
247             }
248              
249             # this is a monstrous evil - TODO instantiate classes when writing!
250             # seek to a particular point in the *database* (not in the file). If the
251             # pointer has gone too far for the current pointer size, die. This will be
252             # caught in Data::CompactReadonly::V0->create(), the pointer size incremented, and it will
253             # try again from the start
254             sub _seek {
255 198750     198750   298644 my $self = shift;
256 198750 100       384006 if($#_ == 0) { # for when reading
257 1708         2240 my $to = shift;
258             # for performance, cache the filehandle in this object
259 1708   66     3259 $self->{_fh} ||= $self->_fh();
260 1708         3145 seek($self->{_fh}, $self->_db_base() + $to, SEEK_SET);
261             } else { # for when writing
262 197042         464708 my %args = @_;
263             die($self->_ptr_blown())
264 197042 100       448221 if($args{pointer} >= 256 ** $args{ptr_size});
265 197038         1359377 seek($args{fh}, $args{pointer}, SEEK_SET);
266             }
267             }
268              
269 8     8   12248 sub _ptr_blown { "pointer out of range" }
270              
271             # the offset of the current node
272             sub _offset {
273 1028     1028   1410 my $self = shift;
274 1028         2489 return $self->{offset};
275             }
276              
277             sub _root {
278 5434     5434   6627 my $self = shift;
279 5434         27630 return $self->{root};
280             }
281              
282             # the filehandle, currently only used when reading, see the TODO above
283             # for _seek
284             sub _fh {
285 189     189   285 my $self = shift;
286 189         407 return $self->_root()->{fh};
287             }
288              
289             sub _ptr_size {
290 1484     1484   2401 my $self = shift;
291 1484         2356 return $self->_root()->{ptr_size};
292             }
293              
294             1;