File Coverage

lib/Math/String/Charset/Grouped.pm
Criterion Covered Total %
statement 239 265 90.1
branch 90 140 64.2
condition 24 40 60.0
subroutine 16 21 76.1
pod 10 13 76.9
total 379 479 79.1


line stmt bran cond sub pod time code
1             #############################################################################
2             # Math/String/Charset/Grouped.pm -- a charset of charsets for Math/String
3             #
4             # Copyright (C) 1999-2003 by Tels. All rights reserved.
5             #############################################################################
6              
7             package Math::String::Charset::Grouped;
8 6     6   23 use base Math::String::Charset;
  6         8  
  6         523  
9              
10 6     6   25 use vars qw($VERSION);
  6         10  
  6         309  
11             $VERSION = '1.29'; # Current version of this package
12             require 5.005; # requires this Perl version or later
13              
14 6     6   24 use strict;
  6         5  
  6         121  
15 6     6   21 use Math::BigInt;
  6         7  
  6         34  
16              
17 6     6   1574 use vars qw/$die_on_error/;
  6         10  
  6         13866  
18             $die_on_error = 1; # set to 0 to not die
19              
20             # following hash values are used:
21             # _clen : length of one character (all chars must have same len unless sep)
22             # _ones : list of one-character strings (cross of _end and _start)
23             # _start : contains array of all valid start characters
24             # _end : contains hash (for easier lookup) of all valid end characters
25             # _order : = 1
26             # _type : = 1
27             # _error : error message or ""
28             # _count : array of count of different strings with length x
29             # _sum : array of starting number for strings with length x
30             # _sum[x] = _sum[x-1]+_count[x-1]
31             # _cnt : number of elements in _count and _sum (as well as in _scnt & _ssum)
32             # _cnum : number of characters in _ones as BigInt (for speed)
33             # _minlen: minimum string length (anything shorter is invalid), default -inf
34             # _maxlen: maximum string length (anything longer is invalid), default +inf
35             # _scale : optional input/output scale
36              
37             # simple ones:
38             # _sep : separator string (undef for none)
39             # _map : mapping character to number
40              
41             # higher orders:
42             # _bi : hash with refs to array of bi-grams
43             # _bmap : hash with refs to hash of bi-grams
44             # _scnt : array of hashes, count of strings starting with this character
45              
46             # grouped:
47             # _spat : array with pattern of charsets 8for each stirnglen one ARRAY ref)
48              
49             #############################################################################
50             # private, initialize self
51              
52             sub _strict_check
53             {
54             # a per class check, to be overwritten by subclasses
55 8     8   10 my $self = shift;
56 8         20 my $value = shift;
57              
58 8         13 my $class = ref($self);
59             return $self->{_error} = "Wrong type '$self->{_type}' for $class"
60 8 50       22 if $self->{_type} != 1;
61             return $self->{_error} = "Wrong order'$self->{_order}' for $class"
62 8 50       19 if $self->{_order} != 1;
63 8         27 foreach my $key (keys %$value)
64             {
65 12 50       61 return $self->{_error} = "Illegal parameter '$key' for $class"
66             if $key !~ /^(start|minlen|maxlen|sep|sets|end|charlen|scale)$/;
67             }
68             }
69              
70             sub _initialize
71             {
72             # set yourself to the value represented by the given string
73 8     8   10 my $self = shift;
74 8         12 my $value = shift;
75              
76 8         14 $self->{_clen} = $value->{charlen};
77 8         13 $self->{_sep} = $value->{sep}; # separator char
78              
79             return $self->{_error} = "Need HASH ref as 'sets'"
80 8 50       24 if (ref($value->{sets}) ne 'HASH');
81              
82             # make copy at same time
83 8         9 foreach my $key (keys %{$value->{sets}})
  8         28  
84             {
85 23         48 $self->{_sets}->{$key} = $value->{sets}->{$key};
86             }
87              
88             # start/end are sets 1 and -1, respectively, and overwrite 'sets'
89 8 100       23 $self->{_sets}->{1} = $value->{start} if exists $value->{start};
90 8 50       20 $self->{_sets}->{-1} = $value->{end} if exists $value->{end};
91 8 50       17 $self->{_sets}->{0} = $value->{chars} if exists $value->{chars};
92             # default set
93 8 50       23 $self->{_sets}->{0} = ['a'..'z'] if !defined $self->{_sets}->{0};
94              
95 8         11 my $sets = $self->{_sets}; # shortcut
96 8         17 foreach my $set (keys %$sets)
97             {
98             return $self->{_error} =
99             "Entries in 'sets' must be ref to Math::String::Charset or ARRAY"
100             if ((ref($sets->{$set}) ne 'ARRAY') &&
101 24 50 66     68 (ref($sets->{$set}) ne 'Math::String::Charset'));
102              
103             # so for each set, make a Math::String::Charset
104             $sets->{$set} = Math::String::Charset->new($sets->{$set})
105 24 100       85 if ref($sets->{$set}) eq 'ARRAY';
106             }
107 8   33     29 $self->{_start} = $sets->{1} || $sets->{0};
108 8   66     38 $self->{_end} = $sets->{-1} || $sets->{0};
109              
110             $self->{_clen} = $self->{_start}->charlen() if
111 8 100 33     61 ((!defined $self->{_clen}) && (!defined $self->{_sep}));
112              
113             # build _ones list (cross from start/end)
114 8         15 $self->{_ones} = [];
115              
116             # _end is a simple charset, so use it's map directly
117 8         12 my $end = $self->{_end}->{_map};
118 8         11 my $o = $self->{_ones};
119 8         24 foreach ($self->{_start}->start())
120             {
121 187 100       277 push @$o, $_ if exists $end->{$_};
122             }
123             #print "\n";
124              
125             # some tests for validity
126 8 100       35 if (!defined $self->{_sep})
127             {
128 6         45 foreach (keys %{$self->{_sets}})
  6         21  
129             {
130 19         38 my $l = $self->{_sets}->{$_}->charlen();
131             return $self->{_error} =
132             "Illegal character length '$l' for charset '$_', expected '$self->{_clen}'"
133 19 50       35 if $self->{_sets}->{$_}->charlen() != $self->{_clen};
134              
135             }
136             }
137 8         11 $self->{_cnum} = Math::BigInt->new( scalar @{$self->{_ones}} );
  8         25  
138             # initialize array of counts for len of 0..1
139 8         393 $self->{_cnt} = 2; # cached amount of class-sizes
140 8 50       23 if ($self->{_minlen} <= 0)
141             {
142 8         835 $self->{_count}->[0] = 1; # '' is one string
143 8         27 my $sl = $self->{_start}->length();
144 8         52 my $el = $self->{_end}->length();
145 8         13 $self->{_count}->[1] = $self->{_cnum};
146 8         15 $self->{_count}->[2] = $sl * $el;
147             # init _sum array
148 8         25 $self->{_sum}->[0] = Math::BigInt->bzero();
149 8         143 $self->{_sum}->[1] = Math::BigInt->bone(); # '' is 1 string
150 8         187 $self->{_sum}->[2] = $self->{_count}->[1] + $self->{_sum}->[1];
151 8         406 $self->{_sum}->[3] = $self->{_count}->[2] + $self->{_sum}->[2];
152             # init set patterns
153 8         804 $self->{_spat}->[1] = [ undef, $self->{_sets}->{0} ];
154 8         28 $self->{_spat}->[2] = [ undef, $self->{_start}, $self->{_end} ];
155             }
156             else
157             {
158 0         0 $self->{_cnt} = 0; # cached amount of class-sizes
159             }
160              
161             # from _ones, make mapping name => number
162 8         24 my $i = Math::BigInt->bone();
163 8         123 foreach (@{$self->{_ones}})
  8         20  
164             {
165 73         2002 $self->{_map}->{$_} = $i++;
166             }
167              
168 8 100       167 if ($self->{_cnum}->is_zero())
169             {
170 4 50       53 $self->{_minlen} = 2 if $self->{_minlen} == 1; # no one's
171             # check whether charset can have 2-character long strings
172 4 50       270 if ($self->{_count}->[2] == 0)
173             {
174 0 0       0 $self->{_minlen} = 3 if $self->{_minlen} == 2; # no two's
175             # check whether some path from start to end set exists, if not: empty
176             }
177             }
178             return $self->{_error} =
179             "Minlen ($self->{_minlen} must be smaller than maxlen ($self->{_maxlen})"
180 8 50       67 if ($self->{_minlen} > $self->{_maxlen});
181 8         159 return $self;
182             }
183              
184             sub dump
185             {
186 1     1 0 2 my $self = shift;
187              
188 1         2 my $txt = "type: GROUPED\n";
189              
190 1         1 foreach my $set (sort { $b<=>$a } keys %{$self->{_sets}})
  2         6  
  1         5  
191             {
192 3         8 $txt .= " $set => ". $self->{_sets}->{$set}->dump(' ');
193             }
194 1         2 $txt .= "ones : " . join(' ',@{$self->{_ones}}) . "\n";
  1         4  
195 1         2 $txt;
196             }
197              
198             sub _calc
199             {
200             # given count of len 1..x, calculate count for y (y > x) and all between
201             # x and y
202             # currently re-calcs from 2 on, we could save the state and only calculate
203             # the missing counts.
204              
205             # print "calc ",caller(),"\n";
206 12     12   15 my $self = shift;
207 12 50 50     24 my $max = shift || 1; $max = 1 if $max < 1;
  12         26  
208 12 100       25 return if $max <= $self->{_cnt};
209              
210             # print "in _calc $self $max\n";
211 8         13 my $i = $self->{_cnt}; # last defined element
212 8         13 my $last = $self->{_count}->[$i];
213 8         23 while ($i++ <= $max)
214             {
215             # build list of charsets for this length
216 20         979 my $spat = []; # set patterns
217 20         27 my $sets = $self->{_sets}; # shortcut
218 20         42 for (my $j = 1; $j <= $i; $j++)
219             {
220 87         85 my $r = $j-$i-1; # reverse
221             # print "$j reversed $r (for $i)\n";
222 87   66     197 $spat->[$j] = $sets->{$j} || $sets->{$r}; # one of both?
223             $spat->[$j] = $sets->{$j}->merge($sets->{$r}) if
224 87 50 66     156 exists $sets->{$j} && exists $sets->{$r}; # both?
225 87 100       206 $spat->[$j] = $sets->{0} unless defined $spat->[$j]; # none?
226             # print $spat->[$j]->dump(),"\n";
227             }
228 20         31 $self->{_spat}->[$i] = $spat; # store
229             # for each charset, take size and mul together
230 20         62 $last = Math::BigInt->bone();
231 20         389 for (my $j = 1; $j <= $i; $j++)
232             {
233             # print "$i $spat->[$j]\n";
234 87         6182 $last *= $spat->[$j]->length();
235             # print "last $last ",$spat->[$j]->length()," ($spat->[$j])\n";
236             }
237 20         1796 $self->{_count}->[$i] = $last;
238             # print "$i: count $last ";
239 20         67 $self->{_sum}->[$i] = $self->{_sum}->[$i-1] + $self->{_count}->[$i-1];
240             # print "sum $self->{_sum}->[$i]\n";
241             }
242 8         474 $self->{_cnt} = $i-1; # store new cache size
243 8         16 return;
244             }
245              
246             sub is_valid
247             {
248             # check wether a string conforms to the given charset sets
249 15     15 1 26 my $self = shift;
250 15         14 my $str = shift;
251              
252             # print "$str\n";
253 15 50       30 return 0 if !defined $str;
254 15 100 66     38 return 1 if $str eq '' && $self->{_minlen} <= 0;
255              
256 14         35 my $int = Math::BigInt->bzero();
257 14         172 my @chars;
258 14 100       23 if (defined $self->{_sep})
259             {
260 1         10 @chars = split /$self->{_sep}/,$str;
261 1 50       5 shift @chars if $chars[0] eq '';
262 1 50       3 pop @chars if $chars[-1] eq $self->{_sep};
263             }
264             else
265             {
266 13         12 my $i = 0; my $len = CORE::length($str); my $clen = $self->{_clen};
  13         11  
  13         13  
267 13         22 while ($i < $len)
268             {
269 40         43 push @chars, substr($str,$i,$clen); $i += $clen;
  40         53  
270             }
271             }
272             # length okay?
273 14 50       34 return 0 if scalar @chars < $self->{_minlen};
274 14 50       605 return 0 if scalar @chars > $self->{_maxlen};
275              
276             # valid start char?
277 14 100       499 return 0 unless defined $self->{_start}->map($chars[0]);
278 13 100       45 return 1 if @chars == 1;
279             # further checks for strings longer than 1
280 12         14 my $k = 1;
281 12         10 my $d = scalar @chars;
282 12 100       23 $self->_calc($d) if ($self->{_cnt} < $d);
283 12         17 my $spat = $self->{_spat}->[$d];
284 12         18 foreach my $c (@chars)
285             {
286 36 100       49 return 0 if !defined $spat->[$k++]->map($c);
287             }
288             # all tests passed
289 7         33 1;
290             }
291              
292             sub minlen
293             {
294 0     0 1 0 my $self = shift;
295              
296 0         0 $self->{_minlen};
297             }
298              
299             sub maxlen
300             {
301 0     0 1 0 my $self = shift;
302              
303 0         0 $self->{_maxlen};
304             }
305              
306             sub start
307             {
308             # this returns all the starting characters in a list, or in case of a simple
309             # charset, simple the charset
310             # in scalar context, returns length of starting set, for simple charsets this
311             # equals the length
312 0     0 1 0 my $self = shift;
313              
314 0 0       0 wantarray ? @{$self->{_start}} : scalar @{$self->{_start}};
  0         0  
  0         0  
315             }
316              
317             sub end
318             {
319             # this returns all the end characters in a list, or in case of a simple
320             # charset, simple the charset
321             # in scalar context, returns length of end set, for simple charsets this
322             # equals the length
323 0     0 1 0 my $self = shift;
324              
325 0 0       0 wantarray ? sort keys %{$self->{_end}} : scalar keys %{$self->{_end}};
  0         0  
  0         0  
326             }
327              
328             sub ones
329             {
330             # this returns all the one-char strings (in scalar context the count of them)
331 0     0 1 0 my $self = shift;
332              
333 0 0       0 wantarray ? @{$self->{_ones}} : scalar @{$self->{_ones}};
  0         0  
  0         0  
334             }
335              
336             sub num2str
337             {
338             # convert Math::BigInt/Math::String to string
339             # in list context, return (string,stringlen)
340 17     17 0 763 my $self = shift;
341 17         19 my $x = shift;
342              
343 17 100       68 $x = new Math::BigInt($x) unless ref $x;
344 17 50       569 return undef if ($x->sign() !~ /^[+-]$/);
345 17 100       122 if ($x->is_zero())
346             {
347 2 100       46 return wantarray ? ('',0) : '';
348             }
349 15         148 my $j = $self->{_cnum}; # nr of chars
350              
351 15 100       59 if ($x <= $j)
352             {
353 5         177 my $c = $self->{_ones}->[$x-1];
354 5 50       899 return wantarray ? ($c,1) : $c; # string len == 1
355             }
356              
357 10         227 my $digits = $self->chars($x); my $d = $digits;
  10         10  
358             # now treat the string as it were a zero-padded string of length $digits
359              
360 10         7 my $es=""; # result
361             # copy input, make positive number, correct to $digits and cater for 0
362 10         20 my $y = Math::BigInt->new($x); $y->babs();
  10         192  
363             #print "fac $j y: $y new: ";
364 10         53 $y -= $self->{_sum}->[$digits];
365              
366 10 50       547 $self->_calc($d) if ($self->{_cnt} < $d);
367             #print "y: $y\n";
368 10 50       10 my $mod = 0; my $s = $self->{_sep}; $s = '' if !defined $s;
  10         9  
  10         16  
369 10         12 my $spat = $self->{_spat}->[$d]; # set pattern
370 10         10 my $k = $d;
371 10         15 while (!$y->is_zero())
372             {
373             #print "bfore: y/fac: $y / $j \n";
374 9         84 ($y,$mod) = $y->bdiv($spat->[$k]->length());
375             #$es = $self->{_ones}->[$mod] . $s.$es;
376 9         1177 $es = $spat->[$k--]->char($mod) . $s.$es; # find mod'th char
377             #print "after: div: $y rem: $mod \n";
378 9         154 $digits --; # one digit done
379             }
380             # padd the remaining digits with the zero-symbol
381 10         90 while ($digits-- > 0)
382             {
383 16         31 $es = $spat->[$k--]->char(0) . $s . $es;
384             }
385 10         48 $es =~ s/$s$//; # strip last sep 'char'
386 10 50       64 wantarray ? ($es,$d) : $es;
387             }
388              
389             sub str2num
390             {
391             # convert Math::String to Math::BigInt
392 24     24 0 3826 my $self = shift;
393 24         27 my $str = shift; # simple string
394              
395 24         71 my $int = Math::BigInt->bzero();
396 24         396 my $i = CORE::length($str);
397              
398 24 100       58 return $int if $i == 0;
399             # print "str2num $i $clen '$str'\n";
400 22         31 my $map = $self->{_map};
401 22         42 my $clen = $self->{_clen}; # len of one char
402              
403 22 100 100     114 if ((!defined $self->{_sep}) && ($i == $clen))
404             {
405 5 50       14 return $int->bnan() if !exists $map->{$str};
406 5         20 return $map->{$str}->copy();
407             }
408              
409 17         41 my $mul = Math::BigInt->bone();
410 17         252 my $cs; # charset at pos i
411 17         39 my $k = 1; # position
412 17         15 my $c = 0; # chars in string
413 17 100       35 if (!defined $self->{_sep})
414             {
415 13 50       28 return $int->bnan() if $i % $clen != 0; # not multiple times clen
416 13         27 $c = int($i/$clen);
417 13 100       31 $self->_calc($c) if ($self->{_cnt} < $c);
418 13         17 my $spat = $self->{_spat}->[$c];
419             # print "$c ($self->{_cnt}) spat: ",scalar @$spat,"\n";
420 13         12 $i -= $clen;
421 13         12 $k = $c;
422 13         31 while ($i >= 0)
423             {
424 52         63 $cs = $spat->[$k--]; # charset at pos k
425             # print "$i $k $cs nr $int ";
426             # print "mapped ",substr($str,$i,$clen)," => ",
427             # $cs->map(substr($str,$i,$clen)) || 0;
428             # print " mul $mul => ";
429 52         155 $int += $mul * $cs->map(substr($str,$i,$clen));
430 52         6983 $mul *= $cs->length();
431             # print "mul $mul\n";
432 52         4112 $i -= $clen;
433             }
434             }
435             else
436             {
437             # with sep char
438 4         21 my @chars = split /$self->{_sep}/, $str;
439 4 50       7 shift @chars if $chars[0] eq ''; # strip leading sep
440 4 50       8 pop @chars if $chars[-1] eq $self->{_sep}; # strip trailing sep
441 4         3 $c = scalar @chars;
442 4 50       8 $self->_calc($c) if ($self->{_cnt} < $c);
443 4         4 my $spat = $self->{_spat}->[$c];
444 4         2 $k = $c;
445 4         7 foreach (reverse @chars)
446             {
447 8         273 $cs = $spat->[$k--]; # charset at pos k
448 8         19 $int += $mul * $cs->map($_);
449 8         1034 $mul *= $cs->length();
450             }
451             }
452 17         358 $int + $self->{_sum}->[$c]; # add base sum
453             }
454              
455             #sub char
456             # {
457             # # return nth char from charset
458             # my $self = shift;
459             # my $char = shift || 0;
460             #
461             # return undef if $char > scalar @{$self->{_ones}}; # dont create spurios elems
462             # return $self->{_ones}->[$char];
463             # }
464              
465             sub first
466             {
467 4     4 1 5 my $self = shift;
468 4   100     13 my $count = abs(shift || 0);
469              
470 4 50       13 return if $count < $self->{_minlen};
471 4 50 33     217 return if defined $self->{_maxlen} && $count > $self->{_maxlen};
472 4 100       180 return '' if $count == 0;
473              
474 3 100       8 return $self->{_ones}->[0] if $count == 1;
475              
476 2         7 $self->_calc($count);
477 2         3 my $spat = $self->{_spat}->[$count];
478 2         3 my $es = '';
479 2   50     7 my $s = $self->{_sep} || '';
480 2         5 for (my $i = 1; $i <= $count; $i++)
481             {
482 5         10 $es .= $s . $spat->[$i]->char(0);
483             }
484 2         9 $s = quotemeta($s);
485 2 50       4 $es =~ s/^$s// if $s ne ''; # remove first sep
486 2         5 $es;
487             }
488              
489             sub last
490             {
491 4     4 1 6 my $self = shift;
492 4   100     12 my $count = abs(shift || 0);
493              
494 4 50       10 return if $count < $self->{_minlen};
495 4 50 33     217 return if defined $self->{_maxlen} && $count > $self->{_maxlen};
496 4 100       175 return '' if $count == 0;
497              
498 3 100       9 return $self->{_ones}->[-1] if $count == 1;
499              
500 2         4 $self->_calc($count);
501 2         2 my $spat = $self->{_spat}->[$count];
502 2         3 my $es = '';
503 2   50     6 my $s = $self->{_sep} || '';
504 2         8 for (my $i = 1; $i <= $count; $i++)
505             {
506 5         10 $es .= $s . $spat->[$i]->char(-1);
507             }
508 2         4 $s = quotemeta($s);
509 2 50       4 $es =~ s/^$s// if $s ne ''; # remove first sep
510 2         6 $es;
511             }
512              
513             sub next
514             {
515 1     1 1 2 my $self = shift;
516 1         2 my $str = shift;
517              
518 1 50       4 if ($str->{_cache} eq '') # 0 => 1
519             {
520 0 0       0 my $min = $self->{_minlen}; $min = 1 if $min <= 0;
  0         0  
521 0         0 $str->{_cache} = $self->first($min);
522 0         0 return;
523             }
524              
525             # only the rightmost digit is adjusted. If this overflows, we simple
526             # invalidate the cache. The time saved by updating the cache would be to
527             # small to be of use, especially since updating the cache takes more time
528             # then. Also, if the cached isn't used later, we would have spent the
529             # update-time in vain.
530              
531             # for higher orders not ready yet
532 1         2 $str->{_cache} = undef;
533             }
534              
535             sub prev
536             {
537 1     1 1 2 my $self = shift;
538 1         2 my $str = shift;
539              
540 1 50       3 if ($str->{_cache} eq '') # 0 => -1
541             {
542 0 0       0 my $min = $self->{_minlen}; $min = -1 if $min >= 0;
  0         0  
543 0         0 $str->{_cache} = $self->first($min);
544 0         0 return;
545             }
546              
547             # for higher orders not ready yet
548 1         3 $str->{_cache} = undef;
549             }
550              
551             __END__