File Coverage

blib/lib/Unicode/Digits.pm
Criterion Covered Total %
statement 37 37 100.0
branch 16 20 80.0
condition 14 15 93.3
subroutine 7 7 100.0
pod 1 1 100.0
total 75 80 93.7


line stmt bran cond sub pod time code
1             package Unicode::Digits;
2              
3 2     2   42584 use warnings;
  2         4  
  2         57  
4 2     2   10 use strict;
  2         4  
  2         76  
5              
6 2     2   12 use Carp;
  2         7  
  2         177  
7 2     2   2871 use Unicode::UCD qw/charinfo/;
  2         440268  
  2         225  
8 2     2   24 use Exporter qw/import/;
  2         6  
  2         1303  
9              
10             our @EXPORT_OK = qw(digits_to_int);
11              
12             =head1 NAME
13              
14             Unicode::Digits - Convert UNICODE digits to integers you can do math with
15              
16             =head1 VERSION
17              
18             Version 20090607
19              
20             =cut
21              
22             our $VERSION = '20090607';
23              
24             =head1 SYNOPSIS
25              
26             So, you have matched a string with C<\d> and now want to do some math.
27             What is that you say? The number your captured plus 5 is 5? Oh, that
28             is right \d now matches UNICODE digits not [0-9]. What to do? Well,
29             You can just call C and all of your troubles* are over!
30              
31             use Unicode::Digits qw/digits_to_int/;
32              
33             my $string = "forty-two in Mongolian is \x{1814}\x{1812}";
34             my $num = digits_to_int $string =~ /(\d+)/;
35             print $num + 5, "\n";
36              
37             =head1 FUNCTIONS
38              
39             =head2 digits_to_int(STRING)
40              
41             The digits_to_int function transliterates a string of UNICODE digit
42             characters to a number you can do math with, non-digit characters are
43             passed through, so C<"42 is \x{1814}\x{1812}"> becomes C<"42 is 42">.
44              
45             =head2 digits_to_int(STRING, ERRORHANDLING)
46              
47             You can optionally pass an argument that controls what happens when
48             the source string contains non-digit characters or characters from
49             different sets of digits. ERRORHANDLING can be one of C<"strict">,
50             C<"loose">, C<"looser">, or C<"loosest">. Their behaviours are as
51             follows:
52              
53             =over
54              
55             =item strict
56              
57             All of the characters must be digit characters and they must all come
58             from the same range (so no mixing Monglian digits with Arabic-Indic
59             digits) or the function will die.
60              
61             =item loose
62              
63             All of the characters must be digit characters or it will die.
64             If there are characters from different ranges you will get a warning.
65              
66             =item looser
67              
68             If there are any non digit characters, or the characters are from
69             different ranges, you will get a warning.
70              
71             =item loosest
72              
73             This is the default mode, all non-digit characters are passed through
74             witout warning, and the digits do not have to come from the same range.
75              
76             =back
77              
78             =cut
79              
80             sub _find_zero($) {
81 38     38   60 my $ord = ord shift;
82 38         92 return $ord - charinfo($ord)->{digit};
83             }
84              
85             sub digits_to_int {
86 20 50 66 20 1 3029 croak "wrong number of arguments" unless @_ == 1 or @_ == 2;
87 20         40 my ($string, $mode) = @_;
88 20 100       50 $mode = "loosest" unless defined $mode;
89              
90 20 50       113 croak "ERRORHANDLING must be strict, loose, looser, or loosest not '$mode'"
91             unless $mode =~ /^(?:strict|loose(?:r|st)?)$/;
92              
93 20 100 100     380 croak "string '$string' contains non-digit characters"
94             if $mode =~ '^(?:strict|loose)$' and $string =~ /\D/;
95              
96 18 100 100     183 carp "string '$string' contains non-digit characters"
97             if $mode eq "looser" and $string =~ /\D/;
98              
99 18         26 my $num;
100 18         67 my ($first_num) = $string =~ /(\d)/;
101 18 50       42 return $string unless defined $first_num;
102              
103 18         34 my $zero = _find_zero $first_num;
104              
105 18         272281 for my $d (split //, $string) {
106 51 100       151 if ($d =~ /\D/) {
107 15         19 $num .= $d;
108 15         17 next;
109             }
110              
111 36         95 my $info = charinfo ord $d;
112              
113 36 100 100     11616 croak "string '$string' contains digits from different ranges"
114             if $mode eq 'strict' and $zero != _find_zero $d;
115              
116 35 100 100     1632 carp "string '$string' contains digits from different ranges"
117             if $mode =~ /^looser?$/ and $zero != _find_zero $d;
118              
119 35 50       4702 die sprintf "U+%x claims to be a digit, but doesn't have a digit number", ord $d
120             unless $info->{digit} =~ /[0-9]/;
121              
122 35         163 $num .= $info->{digit};
123             }
124 17         91 return $num;
125             }
126              
127             =head1 AUTHOR
128              
129             Chas. J. Owens IV, C<< >>
130              
131             =head1 DIAGNOSTICS
132              
133             =over
134              
135             =item "wrong number of arguments"
136              
137             C takes one or two arguments,
138             if you have more than two or no arguments you will recieve this error.
139              
140             =item "ERRORHANDLING must be strict, loose, looser, or loosest not '%s'"
141              
142             If you pass a second argument that is not strict, loose, looser,
143             or loosest to C, you will
144             recieve this error.
145              
146             =item "string '%s' contains non-digit characters"
147              
148             You will recieve this message as a warning or error (depending on what
149             mode you chose), if the string has characters that do not have the
150             UNICODE digit property.
151              
152             =item "string '$s' contains digits from different ranges"
153              
154             You will recieve this message as a warning or error (depending on what
155             mode you chose), if the string has characters that are not part of the
156             same range of digit characters.
157              
158             =item "U+%x claims to be a digit, but doesn't have a digit number"
159              
160             This error is unlikely to occur, if it does then the bug is either with
161             my code (the likely scenario) or C (not very likely).
162              
163             =back
164              
165             =head1 BUGS
166              
167             My understanding of UNICODE is flawed, therefore, I have undoubtly done
168             something wrong. For instance, what should be done with "5\x{0308}"?
169             Also, there is a bunch of stuff relating to surrogates I don't understand.
170              
171             =head1 SUPPORT
172              
173             You can find documentation for this module with the perldoc command.
174              
175             perldoc Unicode::Digits
176              
177             =head1 COPYRIGHT & LICENSE
178              
179             Copyright 2009 Chas. J. Owens IV, all rights reserved.
180              
181             This program is free software; you can redistribute it and/or modify it
182             under the same terms as Perl itself.
183              
184             =cut
185              
186             "this is not an interesting return value";