File Coverage

blib/lib/Compress/BraceExpansion.pm
Criterion Covered Total %
statement 158 176 89.7
branch 63 76 82.8
condition 6 11 54.5
subroutine 14 16 87.5
pod 3 3 100.0
total 244 282 86.5


line stmt bran cond sub pod time code
1             package Compress::BraceExpansion;
2              
3 7     7   188263 use warnings;
  7         16  
  7         236  
4 7     7   36 use strict;
  7         12  
  7         220  
5              
6 7     7   3070 use Data::Dumper;
  7         32780  
  7         615  
7              
8             our $VERSION = '0.1.7';
9              
10              
11 7     7   6293 use Class::Std::Utils;
  7         31709  
  7         43  
12             {
13             my %strings_of;
14             my %tree_of;
15             my %pointers_of;
16             my %pointer_id_of;
17             my %debug_of;
18              
19             sub new {
20 67     67 1 54696 my ($class, $arg_ref, @strings ) = @_;
21              
22 67         837 my $new_object = bless anon_scalar( ), $class;
23              
24             # initialize arguments
25 67 100 66     670 if ( $arg_ref && ref $arg_ref eq "HASH" ) {
    50 33        
    0          
26             # initialized with a hash of config options
27 54         247 $strings_of{ident $new_object} = $arg_ref->{strings};
28             }
29             elsif ( $arg_ref && ref $arg_ref eq "ARRAY" ) {
30             # initialized with an array of strings
31 13         58 $strings_of{ident $new_object} = $arg_ref;
32             }
33             elsif ( @strings ) {
34             # initialized with an array
35 0         0 $strings_of{ident $new_object} = [ $arg_ref, @strings ];
36             }
37             else {
38 0         0 die "ERROR: No strings specified - call new() with a hash ref or array ref";
39             }
40              
41             # initial pointer id
42 67         187 $pointer_id_of{ident $new_object} = 1000;
43 67         187 $pointers_of{ident $new_object} = {};
44              
45 67         186 return $new_object;
46             }
47              
48             # attempt compression
49             sub shrink {
50 24     24 1 68 my ( $self ) = @_;
51              
52 24 50       80 unless ( $strings_of{ident $self} ) {
53 0         0 die "Error: No strings - define strings in new()";
54             }
55 24         29 my @strings = @{ $strings_of{ident $self} };
  24         89  
56              
57 24 50       73 if ( $debug_of{ident $self} ) {
58 0         0 print "STRINGS: ", join ( " ", @strings ), "\n";
59 0         0 print Dumper \@strings;
60 0         0 print "\n";
61             }
62              
63             # build the tree
64 24         54 $self->_build_tree( );
65 24 50       66 if ( $debug_of{ident $self} ) {
66 0         0 print "TREE BUILT:\n";
67 0         0 print Dumper $tree_of{ident $self};
68 0         0 print "\n";
69             }
70              
71             # merge the main tree
72 24         122 $tree_of{ident $self} = $self->_merge_tree_recurse( $tree_of{ident $self} );
73              
74             # merge the pointers
75 24         27 for my $branch ( keys %{ $pointers_of{ident $self} } ) {
  24         100  
76 10         35 $pointers_of{ident $self}->{$branch} = $self->_merge_tree_recurse( $pointers_of{ident $self}->{$branch} );
77             }
78 24 50       114 if ( $debug_of{ident $self} ) {
79 0         0 print "TREE MERGED:\n";
80 0         0 print Dumper $tree_of{ident $self};
81 0         0 print Dumper $pointers_of{ident $self};
82 0         0 print "\n";
83             }
84              
85 24         79 return scalar $self->_print_tree_recurse( $tree_of{ident $self}->{'ROOT'} );
86              
87             }
88              
89             # given an array of strings, walk through a build a data tree to
90             # represent the strings. Each string will be split into a hash where
91             # each layer of the hash represents one character in the string. For
92             # example, abc will be represented as:
93             #
94             # { a => { b => { c => { end => 1 } } } }
95             #
96             sub _build_tree {
97 37     37   90 my ( $self ) = @_;
98 37         97 my $tree_h = { ROOT => {} };
99 37         53 for my $text ( @{ $strings_of{ident $self} } ) {
  37         115  
100 86         125 my $pointer = $tree_h->{'ROOT'};
101 86         199 for my $character_count ( 0 .. length( $text )-1 ) {
102 409         530 my $character = substr( $text, $character_count, 1 );
103 409 100       1140 $pointer->{ $character } = {} unless $pointer->{ $character };
104             # if leaf node
105 409 100       892 if ( $character_count == length( $text ) - 1 ) {
106 86         182 $pointer->{ $character }->{'end'} = 1;
107             }
108 409         705 $pointer = $pointer->{ $character };
109             }
110 86         187 $pointer = $text;
111             }
112 37         170 $tree_of{ident $self} = $tree_h;
113             }
114              
115             # given a data tree, recurse through and print the structure.
116             sub _print_tree_recurse {
117             #my ( $buffer, $tree_h, $main_tree ) = @_;
118 488     488   779 my ( $self, $tree_h, $buffer ) = @_;
119 488 100       1151 return unless ref $tree_h eq 'HASH';
120              
121 486         583 my @nodes = sort keys %{ $tree_h };
  486         1598  
122 486 50       1233 return ( $buffer ) if @nodes == 0;
123 486         531 my $pointer;
124              
125 486 100       937 if ( @nodes == 1 ) {
    50          
126 449 100       900 if ( $nodes[0] eq 'POINTER' ) {
127 23         76 return ( $buffer, $tree_h->{ $nodes[0] } );
128             } else {
129 426         681 for my $node ( @nodes ) {
130 426 100       743 if ( $node eq 'end' ) {
131 125         332 $buffer .= "";
132             } else {
133 301         389 $buffer .= $node;
134 301         278 my $lbuffer;
135 301         721 ( $lbuffer, $pointer ) = $self->_print_tree_recurse( $tree_h->{$node} );
136 301 100       788 if ( defined $lbuffer ) {
137 292         759 $buffer .= "$lbuffer";
138             }
139             }
140             }
141             }
142             } elsif ( @nodes > 1 ) {
143 37         50 $buffer .= "{";
144 37         45 my ( @bits );
145 37         49 for my $node ( @nodes ) {
146 83 50       167 next if $node eq 'POINTERS';
147 83 50       193 if ( $node eq 'POINTER' ) {
    100          
148 0         0 $pointer = $tree_h->{$node};
149             } elsif ( $node eq 'end' ) {
150 2         7 push @bits, "";
151             } else {
152 81         95 my $lbuffer;
153 81         199 ( $lbuffer, $pointer ) = $self->_print_tree_recurse( $tree_h->{$node}, $node );
154 81         255 push @bits, $lbuffer;
155             }
156             }
157 37         94 $buffer .= join ",", @bits;
158 37         49 $buffer .= "}";
159              
160 37 100 66     146 if ( $pointer && $pointers_of{ident $self}->{ $pointer } ) {
161 10         45 my $output = $self->_print_tree_recurse( $pointers_of{ident $self}->{ $pointer } );
162 10         15 $buffer .= $output;
163 10         38 delete $tree_of{ident $self}->{ $pointer };
164 10         18 $pointer = undef;
165             }
166             }
167 463 100       1071 if (wantarray( )) {
168             # list context - only really useful when being called from within
169             # a recursion.
170 359         1140 return ( $buffer, $pointer );
171             }
172              
173 104         442 return $buffer;
174             }
175              
176             # walk through the tree looking for ends that are identical. If
177             # identical ends are found on all branches, copy the branch off to a
178             # temporary branch location and replace the originals with a link to
179             # the new location. Currently this only handles the cases where all
180             # branches are identical from some point until the end of the strings.
181             sub _merge_tree_recurse {
182 184     184   362 my ( $self, $tree, $root ) = @_;
183              
184 184 100       343 unless ( $root ) { $root = $tree };
  47         55  
185              
186 184         176 my @nodes = keys %{ $tree };
  184         516  
187 184 100       443 if ( @nodes == 1 ) {
    50          
188 152 100       331 return ( $tree, $root ) if $nodes[0] eq 'end';
189 137         373 ( $tree ) = $self->_merge_tree_recurse( $tree->{ $nodes[0] }, $root );
190             } elsif ( @nodes > 1 ) {
191 32         40 my @paths;
192 32         58 for my $node ( @nodes ) {
193 59         153 my $text = $self->_print_tree_recurse( $tree->{$node} );
194 59 100       186 return ( $tree, $root ) unless $text;
195 47         99 push @paths, $text;
196             }
197              
198             # check for merge points in the tree. if they exist,
199             # transplant them.
200 20         58 my $depth = _check_merge_point( @paths );
201 20 100       52 if ( defined( $depth ) ) {
202             #print "\n\n";
203             #print "Merging at depth: $depth\n";
204             #print Dumper @paths;
205             #print "\n\n";
206 15   50     57 $tree = $self->_transplant( $tree, $depth||1 );
207             }
208             }
209              
210 157 100       321 if (wantarray( )) {
211             # list context - only really useful when being called
212             # within a recursion
213 124         276 return( $tree, $root );
214             }
215              
216 33         124 return $root;
217             }
218              
219              
220             # given a data tree, a set of paths within that tree, and the depth
221             # beyond which they are all identical, clone the paths and relocate
222             # the identical branches on the POINTERS node. Remove the specified
223             # paths and replace them with a link to the new location.
224             sub _transplant {
225 21     21   56 my ( $self, $tree_h, $depth ) = @_;
226              
227 21         25 my @nodes = keys %{ $tree_h };
  21         68  
228              
229 21         62 my $id = $self->_get_new_pointer_id();
230             #print "\nID: $id\n";
231 21         26 my $pruned;
232              
233 21         40 for my $node ( @nodes ) {
234 41         47 my ( $depth_pointer, $next_node );
235 41 100       83 if ( $depth > 1 ) {
236 14         23 $depth_pointer = $tree_h->{ $node };
237 14         18 $next_node = (keys %{ $depth_pointer })[0];
  14         30  
238 14 50       39 die "tried to transplant past end of tree" if $next_node eq 'end';
239 14 100       32 if ( $depth > 2 ) {
240 6         15 for my $depth ( 2 .. $depth - 1) {
241 11         49 $depth_pointer = $depth_pointer->{ $next_node };
242 11         13 $next_node = (keys %{ $depth_pointer })[0];
  11         22  
243 11 100       60 die "tried to transplant past end of tree" if $next_node eq 'end';
244             #print "DEPTH:\n";
245             #print Dumper $depth_pointer;
246             }
247             }
248             } else {
249 27         31 $depth_pointer = $tree_h;
250 27         38 $next_node = $node;
251             }
252              
253             # if this is the end of the tree, give up trying
254 39         64 my $child_node = $depth_pointer->{ $next_node };
255 39         43 my $child_node_name = (keys %{ $depth_pointer->{ $next_node } })[0];
  39         85  
256 39 100       96 if ( $child_node_name eq 'end' ) {
257 1         12 die "Error: Tried to transplant end of tree";
258             }
259              
260 38 100       126 unless ( $pruned ) {
261 18         30 $pruned = $depth_pointer->{ $next_node };
262             #print "PRUNED:\n";
263             #print Dumper $pruned;
264             }
265 38         199 $depth_pointer->{ $next_node } = { POINTER => $id };
266             }
267 18         75 $pointers_of{ident $self}->{ $id } = $pruned;
268              
269 18         80 return ( $tree_h );
270             }
271              
272             # given a series of strings, determine the longest number of
273             # characters that all strings have in common beginning from the tail
274             # end. Return the number of characters from the current location
275             # (which will represent the number of hash levels deep) where the
276             # similar strings begin.
277             sub _check_merge_point {
278 40     40   8121 my ( @strings ) = @_;
279              
280             # search for the longest substring from the end that all strings
281             # match.
282 40         65 my $base = $strings[0];
283 40         63 my $base_length = length( $base );
284 40         48 my $length = $base_length;
285 40         97 while ( $length ) {
286 81         91 my @ends;
287 81         121 for my $string ( @strings ) {
288 194 100       411 return unless length( $string ) eq $base_length;
289 192         435 my $end = substr( $string, $base_length - $length, $length );
290 192         380 push @ends, $end;
291             }
292 79 100       174 if ( _check_array_values_equal( @ends ) ) {
293 30         142 return $base_length - $length + 1;
294             }
295 49         134 $length--;
296             }
297 8         32 return;
298             }
299              
300             # given an array of strings, check that if strings are the same.
301             sub _check_array_values_equal {
302 79     79   165 my ( @array ) = @_;
303              
304 79         100 my $base = $array[0];
305 79         118 for my $array ( @array ) {
306 170 100       471 return unless $array eq $base;
307             }
308 30         97 return 1;
309             }
310              
311             sub _get_root {
312 0     0   0 my ( $self ) = @_;
313 0         0 return $tree_of{ident $self};
314             }
315              
316             sub _get_new_pointer_id {
317 21     21   34 my ( $self ) = @_;
318 21         59 $pointer_id_of{ident $self}++;
319 21         87 return "PID:" . $pointer_id_of{ident $self};
320             }
321              
322             sub _get_pointers {
323 16     16   21630 my ( $self ) = @_;
324 16 100       21 if ( keys %{ $pointers_of{ident $self} } ) {
  16         94  
325 8         51 return $pointers_of{ident $self};
326             }
327 8         16 return;
328             }
329              
330             sub enable_debug {
331 0     0 1   my ( $self ) = @_;
332 0           $debug_of{ident $self} = 1;
333             }
334              
335             }
336              
337             #
338             # next generation idea
339             #
340             # 1. add weights to each node in graph based on how many strings pass
341             # through each node
342             # 2. test collapses around nodes with highest weights
343             # 3. develop an api of collapsing strategies
344             # 4. autogenerated test cases - expand in shell - compare efficiency
345             #
346             #
347              
348              
349             1;
350              
351             __END__