File Coverage

blib/lib/Lingua/RO/Numbers.pm
Criterion Covered Total %
statement 142 154 92.2
branch 93 110 84.5
condition 24 36 66.6
subroutine 12 12 100.0
pod 3 3 100.0
total 274 315 86.9


line stmt bran cond sub pod time code
1             package Lingua::RO::Numbers;
2              
3             #
4             ## See: http://ro.wikipedia.org/wiki/Sistem_zecimal#Denumiri_ale_numerelor
5             #
6              
7 3     3   66847 use utf8;
  3         15  
  3         17  
8 3     3   89 use strict;
  3         3  
  3         95  
9 3     3   24 use warnings;
  3         8  
  3         2140  
10              
11             require Exporter;
12             our @ISA = qw(Exporter);
13             our @EXPORT_OK = qw(number_to_ro ro_to_number);
14              
15             =encoding utf8
16              
17             =head1 NAME
18              
19             Lingua::RO::Numbers - Converts numeric values into their Romanian string equivalents
20              
21             =head1 VERSION
22              
23             Version 0.20
24              
25             =cut
26              
27             our $VERSION = '0.20';
28              
29             # Numbers => text
30             our %DIGITS;
31             @DIGITS{'0' .. '19'} = qw(
32             zero unu doi trei patru cinci șase șapte opt nouă zece
33             unsprezece
34             doisprezece
35             treisprezece
36             paisprezece
37             cincisprezece
38             șaisprezece
39             șaptesprezece
40             optsprezece
41             nouăsprezece
42             );
43              
44             # Text => numbers
45             our %WORDS;
46             @WORDS{map { _remove_diacritics($_) } values %DIGITS} = keys %DIGITS;
47             @WORDS{qw(o un doua cin sai ob)} = (1, 1, 2, 5, 6, 8);
48              
49             # Colocvial
50             @WORDS{qw(unspe doispe treispe paispe cinspe cinsprezece saispe saptespe saptuspe optspe nouaspe)} =
51             (11, 12, 13, 14, 15, 15, 16, 17, 17, 18, 19);
52              
53             # This array contains number greater than 1000 and it's used to convert numbers into text
54             our @BIGNUMS = (
55             {num => 10**2, sg => 'suta', pl => 'sute', fem => 1},
56             {num => 10**3, sg => 'mie', pl => 'mii', fem => 1},
57             {num => 10**6, sg => 'milion', pl => 'milioane'},
58             {num => 10**9, sg => 'miliard', pl => 'miliarde'},
59             {num => 10**12, sg => 'bilion', pl => 'bilioane'},
60             {num => 10**15, sg => 'biliard', pl => 'biliarde'},
61             {num => 10**18, sg => 'trilion', pl => 'trilioane'},
62             {num => 10**21, sg => 'triliard', pl => 'triliarde'},
63             {num => 10**24, sg => 'cvadrilion', pl => 'cvadrilioane'},
64             {num => 10**27, sg => 'cvadriliard', pl => 'cvadriliarde'},
65             {num => 'inf', sg => 'inifinit', pl => 'infinit'},
66             );
67              
68             # This hash is a reversed version of the above array and it's used to convert text into numbers
69             our %BIGWORDS = (map { $_->{sg} => $_->{num}, $_->{pl} => $_->{num} } @BIGNUMS);
70              
71             # Change 'suta' to 'sută'
72             $BIGNUMS[0]{'sg'} = 'sută';
73              
74             =head1 SYNOPSIS
75              
76             use Lingua::RO::Numbers qw(number_to_ro ro_to_number);
77             print number_to_ro(315);
78             # prints: 'trei sute cincisprezece'
79              
80             print ro_to_number('trei sute douazeci si cinci virgula doi');
81             # prints: 325.2
82              
83             =head1 DESCRIPTION
84              
85             Lingua::RO::Numbers converts arbitrary numbers into human-readable
86             Romanian text and viceversa, converting arbitrary Romanian text
87             into its corresponding numerical value.
88              
89             =head2 EXPORT
90              
91             Nothing is exported by default.
92             Only the functions B and B are exportable.
93              
94             =over
95              
96             =item B
97              
98             Initialize an object.
99              
100             my $obj = Lingua::RO::Numbers->new();
101              
102             is equivalent with:
103              
104             my $obj = Lingua::RO::Numbers->new(
105             diacritics => 1,
106             invalid_number => undef,
107             negative_sign => 'minus',
108             decimal_point => 'virgulă',
109             thousands_separator => '',
110             infinity => 'infinit',
111             not_a_number => 'NaN',
112             );
113              
114             =item B
115              
116             Converts a number to its Romanian string representation.
117              
118             # Functional oriented usage
119             $string = number_to_ro($number);
120             $string = number_to_ro($number, %opts);
121              
122             # Object oriented usage
123             my $obj = Lingua::RO::Numbers->new(%opts);
124             $string = $obj->number_to_ro($number);
125              
126             # Example:
127             print number_to_ro(98_765, thousands_separator => q{,});
128             # says: 'nouăzeci și opt de mii, șapte sute șaizeci și cinci'
129              
130             =item B
131              
132             Converts a Romanian text into its numeric value.
133              
134             # Functional oriented usage
135             $number = ro_to_number($text);
136             $number = ro_to_number($text, %opts);
137              
138             # Object oriented usage
139             my $obj = Lingua::RO::Numbers->new(%opts);
140             $number = $obj->ro_to_number($text);
141              
142             # Example:
143             print ro_to_number('patruzeci si doi'); # says: 42
144              
145             =back
146              
147             =cut
148              
149             sub new {
150 4     4 1 6384 my ($class, %opts) = @_;
151              
152 4         25 my $self = bless {
153             diacritics => 1,
154             invalid_number => undef,
155             negative_sign => 'minus',
156             decimal_point => 'virgulă',
157             thousands_separator => '',
158             infinity => 'infinit',
159             not_a_number => 'NaN',
160             }, $class;
161              
162 4         6 foreach my $key (keys %{$self}) {
  4         25  
163 28 100       55 if (exists $opts{$key}) {
164 4         11 $self->{$key} = delete $opts{$key};
165             }
166             }
167              
168 4         12 foreach my $invalid_key (keys %opts) {
169 0         0 warn "Invalid option: <$invalid_key>";
170             }
171              
172 4         10 return $self;
173             }
174              
175             # This function it's an interface to a private function
176             # which converts a mathematical number into its Romanian equivalent text.
177             sub number_to_ro {
178 34     34 1 8754 my ($self, $number, %opts);
179              
180 34 100       112 if (ref $_[0] eq __PACKAGE__) {
181 33         63 ($self, $number) = @_;
182             }
183             else {
184 1         2 ($number, %opts) = @_;
185 1         3 $self = __PACKAGE__->new(%opts);
186             }
187              
188 34         106 my $word_number = $self->_number_to_ro($number + 0);
189              
190 34 50       97 if (not $self->{diacritics}) {
191 0         0 $word_number = _remove_diacritics($word_number);
192             }
193              
194             # Return the text-number
195 34         174 $word_number;
196             }
197              
198             # This function it's an interface to a private function
199             # which converts a Romanian text-number into its mathematical value.
200             sub ro_to_number {
201 33     33 1 569 my ($self, $text, %opts);
202              
203 33 100       102 if (ref $_[0] eq __PACKAGE__) {
204 32         77 ($self, $text) = @_;
205             }
206             else {
207 1         4 ($text, %opts) = @_;
208 1         6 $self = __PACKAGE__->new(%opts);
209             }
210              
211             ( # Decode the text unless it is already UTF-8
212             $] >= 5.0080001 ? utf8::is_utf8($text) : do {
213 0         0 require Encode;
214 0         0 Encode::is_utf8($text);
215             }
216             )
217 33 50       152 || do {
    100          
218 3         911 require Encode;
219 3         21093 $text = Encode::decode_utf8($text);
220             };
221              
222             # Return the number
223 33         208 $self->_ro_to_number($text);
224             }
225              
226             # This function removes the Romanian diacritics from a given text.
227             sub _remove_diacritics {
228 161     161   473103 my ($text) = @_;
229 3     3   20 $text =~ tr{ăâșțî}{aasti};
  3         11  
  3         37  
  161         414  
230 161         295 $text;
231             }
232              
233             # This functions removes irrelevant characters from a text
234             sub _normalize_text {
235              
236             # Lowercase and remove the diacritics
237 101     101   387 my $text = _remove_diacritics(lc(shift));
238              
239             # Replace irrelevant characters with a space
240 101         203 $text =~ tr/a-z / /c;
241              
242             # Return the normalized text
243 101         335 $text;
244             }
245              
246             # This function adds together a list of numbers
247             sub _add_numbers {
248 38     38   76 my (@nums) = @_;
249              
250 38         45 my $num = 0;
251 38         96 while (defined(my $i = shift @nums)) {
252              
253             # When the current number is lower than the next number
254 82 100 100     264 if (@nums and $i < $nums[0]) {
255 22         29 my $n = shift @nums;
256              
257             # This is a special case, where: int(log(1000)/log(10)) == 2
258 22         68 my $l = log($n) / log(10);
259 22 100       183 if (length($l) == length(int($l))) {
260 1         8 $l = sprintf('%.0f', $l);
261             }
262              
263             # Factor (e.g.: 400 -> 4)
264 22         221 my $f = int($i / (10**int(log($i) / log(10))));
265              
266             # When the next number is not really next to the current number
267             # e.g.: $i == 400 and $n == 5000 # should produce 405_000 not 45_000
268 22 50       65 if ((my $mod = length($n) % 3) != 0) {
269 22         37 $f *= 10**(3 - $mod);
270             }
271              
272             # Join the numbers and continue
273 22         36 $num += 10**int($l) * $f + $n;
274 22         67 next;
275             }
276              
277 60         146 $num += $i;
278             }
279              
280 38         80 $num;
281             }
282              
283             # This function converts a Romanian
284             # text-number into a mathematical number.
285             sub _ro_to_number {
286 33     33   48 my ($self, $text) = @_;
287              
288             # When no text has been provided
289 33 50       89 if (not defined $text) {
290 0         0 return;
291             }
292              
293             # If a thousand separator is defined, remove it from text
294 33 50 33     206 if (defined($self->{thousands_separator}) and length($self->{thousands_separator})) {
295 0         0 $text =~ s/\Q$self->{thousands_separator}\E/ /g;
296             }
297              
298             # Split the text into words
299 33         71 my @words = split(' ', _normalize_text($text));
300              
301 33         101 my $dec_point = _normalize_text($self->{decimal_point});
302 33         72 my $neg_sign = _normalize_text($self->{negative_sign});
303              
304 33         35 my @nums; # numbers
305             my @decs; # decimal numbers
306              
307 33         32 my $neg = 0; # bool -- true when the number is negative
308 33         39 my $adec = 0; # bool -- true after the decimal point
309              
310 33         39 my $amount = 0; # int -- current number
311 33         34 my $factor = 1; # int -- multiplication factor
312              
313 33 50       90 if (@words) {
314              
315             # Check for negative numbers
316 33 100       82 if ($words[0] eq $neg_sign) {
317 2         5 $neg = 1;
318 2         3 shift @words;
319             }
320              
321             # Check for infinity and NaN
322 33 100       106 if (@words == 1) {
323              
324             # Infinity
325 1         5 my $inf = _normalize_text($self->{infinity});
326 1 50       6 if ($words[0] eq $inf) {
327 0 0       0 return $neg ? -9**9**9 : 9**9**9;
328             }
329              
330             # Not a number
331 1         5 my $nan = _normalize_text($self->{not_a_number});
332 1 50       5 if ($words[0] eq $nan) {
333 0         0 return -sin(9**9**9);
334             }
335             }
336             }
337              
338             # Iterate over the @words
339 33   0     98 while (
      66        
340             @words and (
341              
342             # It's a small number (lower than 100)
343             do {
344             $factor = exists($WORDS{$words[0]}) ? 1 : $words[0] =~ s/zeci\z// ? 10 : 0;
345             $factor && do { $amount = shift @words };
346             $factor;
347             }
348              
349             # It's a big number (e.g.: milion)
350             or @words && exists($BIGWORDS{$words[0]}) && do {
351             $factor = $BIGWORDS{shift @words};
352             }
353              
354             # Ignore invalid words
355             or do {
356             shift @words;
357             next;
358             }
359             )
360             ) {
361              
362             # Take and multiply the current number
363 106 50       301 my $num =
364             exists($WORDS{$amount})
365             ? $WORDS{$amount} * $factor
366             : next; # skip invalid words
367              
368             # Check for some word-joining tokens
369 106 100       182 if (@words) {
370 101 100       191 if ($words[0] eq 'si') { # e.g.: patruzeci si doi
371 32         35 shift @words;
372 32         77 $num += $WORDS{shift @words};
373             }
374              
375 101 100       178 if (@words) {
376             {
377 85 100       212 if ($words[0] eq 'de') { # e.g.: o suta de mii
  87         144  
378 21         25 shift @words;
379             }
380              
381 87 100       210 if (exists $BIGWORDS{$words[0]}) {
382 80         117 $num *= $BIGWORDS{shift @words};
383             }
384              
385 87 100 100     615 if (@words && $words[0] eq 'de') {
386 2         4 redo;
387             }
388             }
389             }
390             }
391              
392             # If we are after the decimal point, store the
393             # numbers in @decs, otherwise store them in @nums.
394 106 100       204 $adec ? push(@decs, $num) : push(@nums, $num);
395              
396             # Check for the decimal point
397 106 100 100     1330 if (@words and $words[0] eq $dec_point) {
398 5         8 $adec = 1;
399 5         17 shift @words;
400             }
401             }
402              
403             # Return undef when no number has been converted
404 33 50       72 @nums || return;
405              
406             # Add all the numbers together (if any)
407 33         80 my $num = _add_numbers(@nums);
408              
409             # If the number contains decimals,
410             # add them at the end of the number
411 33 100       69 if (@decs) {
412              
413             # Special case -- check for leading zeros
414 5         10 my $zeros = '';
415 5   66     27 while (@decs and $decs[0] == 0) {
416 2         12 $zeros .= shift(@decs);
417             }
418              
419 5         16 $num .= '.' . $zeros . _add_numbers(@decs);
420             }
421              
422             # Return the number
423 33 100       563 $neg ? -$num : $num + 0;
424             }
425              
426             # This function converts numbers
427             # into their Romanian equivalent text.
428             sub _number_to_ro {
429 150     150   193 my ($self, $number) = @_;
430              
431 150         133 my @words;
432 150 100 33     1160 if (exists $DIGITS{$number}) { # example: 8
    50          
    50          
    100          
    100          
    100          
    50          
433 15         31 push @words, $DIGITS{$number};
434             }
435             elsif (lc($number) eq 'nan') { # not a number (NaN)
436 0         0 return $self->{not_a_number};
437             }
438             elsif ($number == 9**9**9) { # number is infinit
439 0         0 return $self->{infinity};
440             }
441             elsif ($number < 0) { # example: -43
442 2         8 push @words, $self->{negative_sign};
443 2         8 push @words, $self->_number_to_ro(abs($number));
444             }
445             elsif ($number != int($number)) { # example: 0.123 or 12.43
446 5         18 my $l = length($number) - 2;
447              
448 5 50       22 if ((length($number) - length(int $number) - 1) < 1) { # special case
449 0         0 push @words, $self->_number_to_ro(sprintf('%.0f', $number));
450             }
451             else {
452 5         10 push @words, $self->_number_to_ro(int $number);
453 5         11 push @words, $self->{decimal_point};
454              
455 5         7 $number -= int $number;
456              
457 5         14 until ($number == int($number)) {
458 16         13 $number *= 10;
459 16         62 $number = sprintf('%.*f', --$l, $number); # because of imprecise multiplication
460 16 100       58 push @words, $DIGITS{0} if $number < 1;
461             }
462 5         13 push @words, $self->_number_to_ro(int $number);
463             }
464             }
465             elsif ($number >= $BIGNUMS[0]{num}) { # i.e.: >= 100
466 87         176 foreach my $i (0 .. $#BIGNUMS - 1) {
467 800         861 my $j = $#BIGNUMS - $i;
468              
469 800 100 66     2111 if ($number >= $BIGNUMS[$j - 1]{num} && $number <= $BIGNUMS[$j]{num}) {
470 87         316 my $cat = int $number / $BIGNUMS[$j - 1]{num};
471 87         164 $number -= $BIGNUMS[$j - 1]{num} * int($number / $BIGNUMS[$j - 1]{num});
472              
473 87 100       249 my @of = $cat <= 2 ? () : do {
474 61 100       228 my @w = exists $DIGITS{$cat} ? $DIGITS{$cat} : ($self->_number_to_ro($cat), 'de');
475 61 100       132 if (@w > 2) {
476 31 100       83 $w[-2] = 'două' if $w[-2] eq $DIGITS{2};
477             }
478 61         145 @w;
479             };
480              
481 87 100 66     238 if ($cat >= 100 && $cat < 1_000) {
482 27         55 my $rest = $cat - 100 * int($cat / 100);
483 27 100 66     163 if (@of and $rest != 0 and exists $DIGITS{$rest}) {
      100        
484 6         16 splice @of, -1; # remove 'de'
485             }
486             }
487              
488 87 100       320 push @words,
    100          
    100          
489             $cat == 1 ? ($BIGNUMS[$j - 1]{fem} ? 'o' : 'un', $BIGNUMS[$j - 1]{sg})
490             : $cat == 2 ? ('două', $BIGNUMS[$j - 1]{pl})
491             : (@of, $BIGNUMS[$j - 1]{pl});
492              
493 87 100       153 if ($number > 0) {
494 73 100       185 $words[-1] .= $self->{thousands_separator} if $BIGNUMS[$j]{num} > 1_000;
495 73         181 push @words, $self->_number_to_ro($number);
496             }
497              
498 87         161 last;
499             }
500             }
501             }
502             elsif ($number > 19 && $number < 100) { # example: 42
503 41         60 my $cat = int $number / 10;
504 41 100       311 push @words, ($cat == 2 ? 'două' : $cat == 6 ? 'șai' : $DIGITS{$cat}) . 'zeci',
    100          
    100          
505             ($number % 10 != 0 ? ('și', $DIGITS{$number % 10}) : ());
506             }
507             else { # doesn't look like a number
508 0         0 return $self->{invalid_number};
509             }
510              
511 150 50       608 return wantarray ? @words : @words ? join(' ', @words) : ();
    100          
512             }
513              
514             =head1 AUTHOR
515              
516             Daniel "Trizen" Șuteu, C<< >>
517              
518             =head1 BUGS
519              
520             Please report any bugs or feature requests to C, or through
521             the web interface at L. I will be notified, and then you'll
522             automatically be notified of progress on your bug as I make changes.
523              
524              
525             =head1 SUPPORT
526              
527             You can find documentation for this module with the perldoc command.
528              
529             perldoc Lingua::RO::Numbers
530              
531              
532             You can also look for information at:
533              
534             =over 4
535              
536             =item * RT: CPAN's request tracker (report bugs here)
537              
538             L
539              
540             =item * AnnoCPAN: Annotated CPAN documentation
541              
542             L
543              
544             =item * CPAN Ratings
545              
546             L
547              
548             =item * Search CPAN
549              
550             L
551              
552             =back
553              
554              
555             =head1 ACKNOWLEDGEMENTS
556              
557             http://ro.wikipedia.org/wiki/Sistem_zecimal#Denumiri_ale_numerelor
558              
559              
560             =head1 LICENSE AND COPYRIGHT
561              
562             Copyright 2013-2014 Daniel "Trizen" Șuteu.
563              
564             This program is free software; you can redistribute it and/or modify it
565             under the terms of the the Artistic License (2.0). You may obtain a
566             copy of the full license at:
567              
568             L
569              
570             Any use, modification, and distribution of the Standard or Modified
571             Versions is governed by this Artistic License. By using, modifying or
572             distributing the Package, you accept this license. Do not use, modify,
573             or distribute the Package, if you do not accept this license.
574              
575             If your Modified Version has been derived from a Modified Version made
576             by someone other than you, you are nevertheless required to ensure that
577             your Modified Version complies with the requirements of this license.
578              
579             This license does not grant you the right to use any trademark, service
580             mark, tradename, or logo of the Copyright Holder.
581              
582             This license includes the non-exclusive, worldwide, free-of-charge
583             patent license to make, have made, use, offer to sell, sell, import and
584             otherwise transfer the Package with respect to any patent claims
585             licensable by the Copyright Holder that are necessarily infringed by the
586             Package. If you institute patent litigation (including a cross-claim or
587             counterclaim) against any party alleging that the Package constitutes
588             direct or contributory patent infringement, then this Artistic License
589             to you shall terminate on the date that such litigation is filed.
590              
591             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
592             AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
593             THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
594             PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
595             YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
596             CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
597             CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
598             EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
599              
600              
601             =cut
602              
603             1; # End of Lingua::RO::Numbers
604              
605             __END__