File Coverage

lib/Math/String/Charset/Grouped.pm
Criterion Covered Total %
statement 238 264 90.1
branch 90 140 64.2
condition 25 40 62.5
subroutine 16 21 76.1
pod 10 13 76.9
total 379 478 79.2


line stmt bran cond sub pod time code
1             #############################################################################
2             # Math/String/Charset/Grouped.pm -- a charset of charsets for Math/String
3             #
4             # Copyright (C) 1999-2003 by Tels. All rights reserved.
5             #############################################################################
6              
7             package Math::String::Charset::Grouped;
8 6     6   34 use base Math::String::Charset;
  6         10  
  6         626  
9              
10 6     6   33 use vars qw($VERSION);
  6         14  
  6         564  
11             $VERSION = '0.06'; # Current version of this package
12             require 5.005; # requires this Perl version or later
13              
14 6     6   31 use strict;
  6         13  
  6         247  
15 6     6   29 use Math::BigInt;
  6         11  
  6         47  
16              
17 6     6   2435 use vars qw/$die_on_error/;
  6         9  
  6         22180  
18             $die_on_error = 1; # set to 0 to not die
19              
20             # following hash values are used:
21             # _clen : length of one character (all chars must have same len unless sep)
22             # _ones : list of one-character strings (cross of _end and _start)
23             # _start : contains array of all valid start characters
24             # _end : contains hash (for easier lookup) of all valid end characters
25             # _order : = 1
26             # _type : = 1
27             # _error : error message or ""
28             # _count : array of count of different strings with length x
29             # _sum : array of starting number for strings with length x
30             # _sum[x] = _sum[x-1]+_count[x-1]
31             # _cnt : number of elements in _count and _sum (as well as in _scnt & _ssum)
32             # _cnum : number of characters in _ones as BigInt (for speed)
33             # _minlen: minimum string length (anything shorter is invalid), default -inf
34             # _maxlen: maximum string length (anything longer is invalid), default +inf
35             # _scale : optional input/output scale
36              
37             # simple ones:
38             # _sep : separator string (undef for none)
39             # _map : mapping character to number
40              
41             # higher orders:
42             # _bi : hash with refs to array of bi-grams
43             # _bmap : hash with refs to hash of bi-grams
44             # _scnt : array of hashes, count of strings starting with this character
45              
46             # grouped:
47             # _spat : array with pattern of charsets 8for each stirnglen one ARRAY ref)
48              
49             #############################################################################
50             # private, initialize self
51              
52             sub _strict_check
53             {
54             # a per class check, to be overwritten by subclasses
55 8     8   12 my $self = shift;
56 8         10 my $value = shift;
57              
58 8         16 my $class = ref($self);
59 8 50       22 return $self->{_error} = "Wrong type '$self->{_type}' for $class"
60             if $self->{_type} != 1;
61 8 50       20 return $self->{_error} = "Wrong order'$self->{_order}' for $class"
62             if $self->{_order} != 1;
63 8         26 foreach my $key (keys %$value)
64             {
65 12 50       70 return $self->{_error} = "Illegal parameter '$key' for $class"
66             if $key !~ /^(start|minlen|maxlen|sep|sets|end|charlen|scale)$/;
67             }
68             }
69              
70             sub _initialize
71             {
72             # set yourself to the value represented by the given string
73 8     8   15 my $self = shift;
74 8         9 my $value = shift;
75              
76 8         26 $self->{_clen} = $value->{charlen};
77 8         13 $self->{_sep} = $value->{sep}; # separator char
78              
79 8 50       28 return $self->{_error} = "Need HASH ref as 'sets'"
80             if (ref($value->{sets}) ne 'HASH');
81            
82             # make copy at same time
83 8         11 foreach my $key (keys %{$value->{sets}})
  8         30  
84             {
85 23         63 $self->{_sets}->{$key} = $value->{sets}->{$key};
86             }
87              
88             # start/end are sets 1 and -1, respectively, and overwrite 'sets'
89 8 100       29 $self->{_sets}->{1} = $value->{start} if exists $value->{start};
90 8 50       20 $self->{_sets}->{-1} = $value->{end} if exists $value->{end};
91 8 50       20 $self->{_sets}->{0} = $value->{chars} if exists $value->{chars};
92             # default set
93 8 50       25 $self->{_sets}->{0} = ['a'..'z'] if !defined $self->{_sets}->{0};
94            
95 8         14 my $sets = $self->{_sets}; # shortcut
96 8         29 foreach my $set (keys %$sets)
97             {
98 24 50 66     97 return $self->{_error} =
99             "Entries in 'sets' must be ref to Math::String::Charset or ARRAY"
100             if ((ref($sets->{$set}) ne 'ARRAY') &&
101             (ref($sets->{$set}) ne 'Math::String::Charset'));
102            
103             # so for each set, make a Math::String::Charset
104 24 100       115 $sets->{$set} = Math::String::Charset->new($sets->{$set})
105             if ref($sets->{$set}) eq 'ARRAY';
106             }
107 8   33     35 $self->{_start} = $sets->{1} || $sets->{0};
108 8   66     35 $self->{_end} = $sets->{-1} || $sets->{0};
109            
110 8 100 33     60 $self->{_clen} = $self->{_start}->charlen() if
111             ((!defined $self->{_clen}) && (!defined $self->{_sep}));
112              
113             # build _ones list (cross from start/end)
114 8         17 $self->{_ones} = [];
115            
116             # _end is a simple charset, so use it's map directly
117 8         24 my $end = $self->{_end}->{_map};
118 8         17 my $o = $self->{_ones};
119 8         57 foreach ($self->{_start}->start())
120             {
121 187 100       414 push @$o, $_ if exists $end->{$_};
122             }
123             #print "\n";
124              
125             # some tests for validity
126 8 100       39 if (!defined $self->{_sep})
127             {
128 6         43 foreach (keys %{$self->{_sets}})
  6         22  
129             {
130 19         52 my $l = $self->{_sets}->{$_}->charlen();
131 19 50       52 return $self->{_error} =
132             "Illegal character length '$l' for charset '$_', expected '$self->{_clen}'"
133             if $self->{_sets}->{$_}->charlen() != $self->{_clen};
134              
135             }
136             }
137 8         18 $self->{_cnum} = Math::BigInt->new( scalar @{$self->{_ones}} );
  8         32  
138             # initialize array of counts for len of 0..1
139 8         407 $self->{_cnt} = 2; # cached amount of class-sizes
140 8 50       26 if ($self->{_minlen} <= 0)
141             {
142 8         907 $self->{_count}->[0] = 1; # '' is one string
143 8         30 my $sl = $self->{_start}->length();
144 8         24 my $el = $self->{_end}->length();
145 8         20 $self->{_count}->[1] = $self->{_cnum};
146 8         18 $self->{_count}->[2] = $sl * $el;
147             # init _sum array
148 8         35 $self->{_sum}->[0] = Math::BigInt->bzero();
149 8         187 $self->{_sum}->[1] = Math::BigInt->bone(); # '' is 1 string
150 8         209 $self->{_sum}->[2] = $self->{_count}->[1] + $self->{_sum}->[1];
151 8         562 $self->{_sum}->[3] = $self->{_count}->[2] + $self->{_sum}->[2];
152             # init set patterns
153 8         1050 $self->{_spat}->[1] = [ undef, $self->{_sets}->{0} ];
154 8         30 $self->{_spat}->[2] = [ undef, $self->{_start}, $self->{_end} ];
155             }
156             else
157             {
158 0         0 $self->{_cnt} = 0; # cached amount of class-sizes
159             }
160              
161             # from _ones, make mapping name => number
162 8         26 my $i = Math::BigInt->bone();
163 8         158 foreach (@{$self->{_ones}})
  8         23  
164             {
165 73         3435 $self->{_map}->{$_} = $i++;
166             }
167            
168 8 100       224 if ($self->{_cnum}->is_zero())
169             {
170 4 50       69 $self->{_minlen} = 2 if $self->{_minlen} == 1; # no one's
171             # check whether charset can have 2-character long strings
172 4 50       286 if ($self->{_count}->[2] == 0)
173             {
174 0 0       0 $self->{_minlen} = 3 if $self->{_minlen} == 2; # no two's
175             # check whether some path from start to end set exists, if not: empty
176             }
177             }
178 8 50       82 return $self->{_error} =
179             "Minlen ($self->{_minlen} must be smaller than maxlen ($self->{_maxlen})"
180             if ($self->{_minlen} > $self->{_maxlen});
181 8         164 return $self;
182             }
183              
184             sub dump
185             {
186 1     1 0 2 my $self = shift;
187            
188 1         2 my $txt = "type: GROUPED\n";
189              
190 1         2 foreach my $set (keys %{$self->{_sets}})
  1         4  
191             {
192 3         11 $txt .= " $set => ". $self->{_sets}->{$set}->dump(' ');
193             }
194 1         3 $txt .= "ones : " . join(' ',@{$self->{_ones}}) . "\n";
  1         3  
195 1         3 $txt;
196             }
197              
198             sub _calc
199             {
200             # given count of len 1..x, calculate count for y (y > x) and all between
201             # x and y
202             # currently re-calcs from 2 on, we could save the state and only calculate
203             # the missing counts.
204              
205             # print "calc ",caller(),"\n";
206 12     12   20 my $self = shift;
207 12 50 50     29 my $max = shift || 1; $max = 1 if $max < 1;
  12         30  
208 12 100       28 return if $max <= $self->{_cnt};
209              
210             # print "in _calc $self $max\n";
211 8         14 my $i = $self->{_cnt}; # last defined element
212 8         15 my $last = $self->{_count}->[$i];
213 8         23 while ($i++ <= $max)
214             {
215             # build list of charsets for this length
216 20         1166 my $spat = []; # set patterns
217 20         30 my $sets = $self->{_sets}; # shortcut
218 20         45 for (my $j = 1; $j <= $i; $j++)
219             {
220 87         109 my $r = $j-$i-1; # reverse
221             # print "$j reversed $r (for $i)\n";
222 87   100     314 $spat->[$j] = $sets->{$j} || $sets->{$r}; # one of both?
223 87 50 66     226 $spat->[$j] = $sets->{$j}->merge($sets->{$r}) if
224             exists $sets->{$j} && exists $sets->{$r}; # both?
225 87 100       280 $spat->[$j] = $sets->{0} unless defined $spat->[$j]; # none?
226             # print $spat->[$j]->dump(),"\n";
227             }
228 20         40 $self->{_spat}->[$i] = $spat; # store
229             # for each charset, take size and mul together
230 20         64 $last = Math::BigInt->bone();
231 20         427 for (my $j = 1; $j <= $i; $j++)
232             {
233             # print "$i $spat->[$j]\n";
234 87         7309 $last *= $spat->[$j]->length();
235             # print "last $last ",$spat->[$j]->length()," ($spat->[$j])\n";
236             }
237 20         2445 $self->{_count}->[$i] = $last;
238             # print "$i: count $last ";
239 20         69 $self->{_sum}->[$i] = $self->{_sum}->[$i-1] + $self->{_count}->[$i-1];
240             # print "sum $self->{_sum}->[$i]\n";
241             }
242 8         534 $self->{_cnt} = $i-1; # store new cache size
243 8         18 return;
244             }
245              
246             sub is_valid
247             {
248             # check wether a string conforms to the given charset sets
249 15     15 1 37 my $self = shift;
250 15         22 my $str = shift;
251              
252             # print "$str\n";
253 15 50       34 return 0 if !defined $str;
254 15 100 66     46 return 1 if $str eq '' && $self->{_minlen} <= 0;
255              
256 14         53 my $int = Math::BigInt->bzero();
257 14         276 my @chars;
258 14 100       34 if (defined $self->{_sep})
259             {
260 1         17 @chars = split /$self->{_sep}/,$str;
261 1 50       6 shift @chars if $chars[0] eq '';
262 1 50       5 pop @chars if $chars[-1] eq $self->{_sep};
263             }
264             else
265             {
266 13         15 my $i = 0; my $len = CORE::length($str); my $clen = $self->{_clen};
  13         22  
  13         23  
267 13         29 while ($i < $len)
268             {
269 40         66 push @chars, substr($str,$i,$clen); $i += $clen;
  40         76  
270             }
271             }
272             # length okay?
273 14 50       45 return 0 if scalar @chars < $self->{_minlen};
274 14 50       1149 return 0 if scalar @chars > $self->{_maxlen};
275              
276             # valid start char?
277 14 100       1065 return 0 unless defined $self->{_start}->map($chars[0]);
278 13 100       38 return 1 if @chars == 1;
279             # further checks for strings longer than 1
280 12         16 my $k = 1;
281 12         16 my $d = scalar @chars;
282 12 100       34 $self->_calc($d) if ($self->{_cnt} < $d);
283 12         20 my $spat = $self->{_spat}->[$d];
284 12         22 foreach my $c (@chars)
285             {
286 36 100       97 return 0 if !defined $spat->[$k++]->map($c);
287             }
288             # all tests passed
289 7         61 1;
290             }
291              
292             sub minlen
293             {
294 0     0 1 0 my $self = shift;
295              
296 0         0 $self->{_minlen};
297             }
298              
299             sub maxlen
300             {
301 0     0 1 0 my $self = shift;
302              
303 0         0 $self->{_maxlen};
304             }
305              
306             sub start
307             {
308             # this returns all the starting characters in a list, or in case of a simple
309             # charset, simple the charset
310             # in scalar context, returns length of starting set, for simple charsets this
311             # equals the length
312 0     0 1 0 my $self = shift;
313              
314 0 0       0 wantarray ? @{$self->{_start}} : scalar @{$self->{_start}};
  0         0  
  0         0  
315             }
316            
317             sub end
318             {
319             # this returns all the end characters in a list, or in case of a simple
320             # charset, simple the charset
321             # in scalar context, returns length of end set, for simple charsets this
322             # equals the length
323 0     0 1 0 my $self = shift;
324              
325 0 0       0 wantarray ? sort keys %{$self->{_end}} : scalar keys %{$self->{_end}};
  0         0  
  0         0  
326             }
327              
328             sub ones
329             {
330             # this returns all the one-char strings (in scalar context the count of them)
331 0     0 1 0 my $self = shift;
332              
333 0 0       0 wantarray ? @{$self->{_ones}} : scalar @{$self->{_ones}};
  0         0  
  0         0  
334             }
335              
336             sub num2str
337             {
338             # convert Math::BigInt/Math::String to string
339             # in list context, return (string,stringlen)
340 17     17 0 2531 my $self = shift;
341 17         26 my $x = shift;
342              
343 17 100       168 $x = new Math::BigInt($x) unless ref $x;
344 17 50       555 return undef if ($x->sign() !~ /^[+-]$/);
345 17 100       157 if ($x->is_zero())
346             {
347 2 100       35 return wantarray ? ('',0) : '';
348             }
349 15         210 my $j = $self->{_cnum}; # nr of chars
350              
351 15 100       52 if ($x <= $j)
352             {
353 5         140 my $c = $self->{_ones}->[$x-1];
354 5 50       953 return wantarray ? ($c,1) : $c; # string len == 1
355             }
356              
357 10         291 my $digits = $self->chars($x); my $d = $digits;
  10         15  
358             # now treat the string as it were a zero-padded string of length $digits
359            
360 10         15 my $es=""; # result
361             # copy input, make positive number, correct to $digits and cater for 0
362 10         32 my $y = Math::BigInt->new($x); $y->babs();
  10         233  
363             #print "fac $j y: $y new: ";
364 10         87 $y -= $self->{_sum}->[$digits];
365            
366 10 50       952 $self->_calc($d) if ($self->{_cnt} < $d);
367             #print "y: $y\n";
368 10 50       13 my $mod = 0; my $s = $self->{_sep}; $s = '' if !defined $s;
  10         16  
  10         28  
369 10         17 my $spat = $self->{_spat}->[$d]; # set pattern
370 10         14 my $k = $d;
371 10         27 while (!$y->is_zero())
372             {
373             #print "bfore: y/fac: $y / $j \n";
374 9         135 ($y,$mod) = $y->bdiv($spat->[$k]->length());
375             #$es = $self->{_ones}->[$mod] . $s.$es;
376 9         2156 $es = $spat->[$k--]->char($mod) . $s.$es; # find mod'th char
377             #print "after: div: $y rem: $mod \n";
378 9         157 $digits --; # one digit done
379             }
380             # padd the remaining digits with the zero-symbol
381 10         137 while ($digits-- > 0)
382             {
383 16         61 $es = $spat->[$k--]->char(0) . $s . $es;
384             }
385 10         109 $es =~ s/$s$//; # strip last sep 'char'
386 10 50       93 wantarray ? ($es,$d) : $es;
387             }
388              
389             sub str2num
390             {
391             # convert Math::String to Math::BigInt
392 24     24 0 23702 my $self = shift;
393 24         33 my $str = shift; # simple string
394              
395 24         75 my $int = Math::BigInt->bzero();
396 24         497 my $i = CORE::length($str);
397              
398 24 100       64 return $int if $i == 0;
399             # print "str2num $i $clen '$str'\n";
400 22         38 my $map = $self->{_map};
401 22         35 my $clen = $self->{_clen}; # len of one char
402              
403 22 100 100     104 if ((!defined $self->{_sep}) && ($i == $clen))
404             {
405 5 50       12 return $int->bnan() if !exists $map->{$str};
406 5         18 return $map->{$str}->copy();
407             }
408              
409 17         70 my $mul = Math::BigInt->bone();
410 17         338 my $cs; # charset at pos i
411 17         22 my $k = 1; # position
412 17         19 my $c = 0; # chars in string
413 17 100       44 if (!defined $self->{_sep})
414             {
415 13 50       48 return $int->bnan() if $i % $clen != 0; # not multiple times clen
416 13         28 $c = int($i/$clen);
417 13 100       36 $self->_calc($c) if ($self->{_cnt} < $c);
418 13         25 my $spat = $self->{_spat}->[$c];
419             # print "$c ($self->{_cnt}) spat: ",scalar @$spat,"\n";
420 13         14 $i -= $clen;
421 13         16 $k = $c;
422 13         43 while ($i >= 0)
423             {
424 52         76 $cs = $spat->[$k--]; # charset at pos k
425             # print "$i $k $cs nr $int ";
426             # print "mapped ",substr($str,$i,$clen)," => ",
427             # $cs->map(substr($str,$i,$clen)) || 0;
428             # print " mul $mul => ";
429 52         180 $int += $mul * $cs->map(substr($str,$i,$clen));
430 52         10481 $mul *= $cs->length();
431             # print "mul $mul\n";
432 52         6079 $i -= $clen;
433             }
434             }
435             else
436             {
437             # with sep char
438 4         32 my @chars = split /$self->{_sep}/, $str;
439 4 50       11 shift @chars if $chars[0] eq ''; # strip leading sep
440 4 50       11 pop @chars if $chars[-1] eq $self->{_sep}; # strip trailing sep
441 4         6 $c = scalar @chars;
442 4 50       9 $self->_calc($c) if ($self->{_cnt} < $c);
443 4         7 my $spat = $self->{_spat}->[$c];
444 4         5 $k = $c;
445 4         7 foreach (reverse @chars)
446             {
447 8         452 $cs = $spat->[$k--]; # charset at pos k
448 8         23 $int += $mul * $cs->map($_);
449 8         6194 $mul *= $cs->length();
450             }
451             }
452 17         1382 $int + $self->{_sum}->[$c]; # add base sum
453             }
454              
455             #sub char
456             # {
457             # # return nth char from charset
458             # my $self = shift;
459             # my $char = shift || 0;
460             #
461             # return undef if $char > scalar @{$self->{_ones}}; # dont create spurios elems
462             # return $self->{_ones}->[$char];
463             # }
464              
465             sub first
466             {
467 4     4 1 7 my $self = shift;
468 4   100     15 my $count = abs(shift || 0);
469              
470 4 50       14 return if $count < $self->{_minlen};
471 4 50 33     397 return if defined $self->{_maxlen} && $count > $self->{_maxlen};
472 4 100       360 return '' if $count == 0;
473              
474 3 100       12 return $self->{_ones}->[0] if $count == 1;
475              
476 2         7 $self->_calc($count);
477 2         4 my $spat = $self->{_spat}->[$count];
478 2         4 my $es = '';
479 2   50     11 my $s = $self->{_sep} || '';
480 2         6 for (my $i = 1; $i <= $count; $i++)
481             {
482 5         16 $es .= $s . $spat->[$i]->char(0);
483             }
484 2         6 $s = quotemeta($s);
485 2 50       5 $es =~ s/^$s// if $s ne ''; # remove first sep
486 2         8 $es;
487             }
488              
489             sub last
490             {
491 4     4 1 8 my $self = shift;
492 4   100     14 my $count = abs(shift || 0);
493              
494 4 50       14 return if $count < $self->{_minlen};
495 4 50 33     369 return if defined $self->{_maxlen} && $count > $self->{_maxlen};
496 4 100       349 return '' if $count == 0;
497              
498 3 100       13 return $self->{_ones}->[-1] if $count == 1;
499            
500 2         5 $self->_calc($count);
501 2         5 my $spat = $self->{_spat}->[$count];
502 2         3 my $es = '';
503 2   50     9 my $s = $self->{_sep} || '';
504 2         14 for (my $i = 1; $i <= $count; $i++)
505             {
506 5         14 $es .= $s . $spat->[$i]->char(-1);
507             }
508 2         4 $s = quotemeta($s);
509 2 50       6 $es =~ s/^$s// if $s ne ''; # remove first sep
510 2         7 $es;
511             }
512              
513             sub next
514             {
515 1     1 1 3 my $self = shift;
516 1         4 my $str = shift;
517              
518 1 50       5 if ($str->{_cache} eq '') # 0 => 1
519             {
520 0 0       0 my $min = $self->{_minlen}; $min = 1 if $min <= 0;
  0         0  
521 0         0 $str->{_cache} = $self->first($min);
522 0         0 return;
523             }
524              
525             # only the rightmost digit is adjusted. If this overflows, we simple
526             # invalidate the cache. The time saved by updating the cache would be to
527             # small to be of use, especially since updating the cache takes more time
528             # then. Also, if the cached isn't used later, we would have spent the
529             # update-time in vain.
530              
531             # for higher orders not ready yet
532 1         3 $str->{_cache} = undef;
533             }
534              
535             sub prev
536             {
537 1     1 1 3 my $self = shift;
538 1         4 my $str = shift;
539              
540 1 50       7 if ($str->{_cache} eq '') # 0 => -1
541             {
542 0 0       0 my $min = $self->{_minlen}; $min = -1 if $min >= 0;
  0         0  
543 0         0 $str->{_cache} = $self->first($min);
544 0         0 return;
545             }
546              
547             # for higher orders not ready yet
548 1         4 $str->{_cache} = undef;
549             }
550              
551             __END__