File Coverage

blib/lib/Lingua/Identify.pm
Criterion Covered Total %
statement 177 181 97.7
branch 58 74 78.3
condition 12 20 60.0
subroutine 34 34 100.0
pod 15 15 100.0
total 296 324 91.3


line stmt bran cond sub pod time code
1             package Lingua::Identify;
2              
3 7     7   168609 use 5.006;
  7         23  
  7         260  
4 7     7   38 use strict;
  7         12  
  7         210  
5 7     7   38 use warnings;
  7         16  
  7         193  
6              
7 7     7   4928 use utf8;
  7         49  
  7         39  
8 7     7   241 use base 'Exporter';
  7         14  
  7         3187  
9              
10             our %EXPORT_TAGS =
11             (
12             all => [ qw(
13             langof
14             langof_file
15             confidence
16             get_all_methods
17             activate_all_languages
18             deactivate_all_languages
19             get_all_languages
20             get_active_languages
21             get_inactive_languages
22             is_active
23             is_valid_language
24             activate_language
25             deactivate_language
26             set_active_languages
27             name_of
28             )
29             ],
30             language_identification => [ qw(
31             langof
32             langof_file
33             confidence
34             get_all_methods
35             )
36             ],
37              
38             language_manipulation => [ qw(
39             activate_all_languages
40             deactivate_all_languages
41             get_all_languages
42             get_active_languages
43             get_inactive_languages
44             is_active
45             is_valid_language
46             activate_language
47             deactivate_language
48             set_active_languages
49             name_of
50             )
51             ],
52             );
53              
54             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
55             our @EXPORT = qw();
56              
57             our $VERSION = '0.57_1';
58              
59              
60             # DEFAULT VALUES #
61             our %default_methods = qw/smallwords 2 prefixes3 1 suffixes3 1 ngrams3 1 letters 0.5/;
62             my $default_maxsize = 1_000_000;
63             my %default_extractfrom = qw/head 1/;
64              
65             =encoding utf8
66              
67             =head1 NAME
68              
69             Lingua::Identify - Language identification
70              
71             =head1 SYNOPSIS
72              
73             use Lingua::Identify qw(:language_identification);
74             $a = langof($textstring); # gives the most probable language
75              
76             or the complete way:
77              
78             @a = langof($textstring); # gives pairs of languages / probabilities
79             # sorted from most to least probable
80              
81             %a = langof($textstring); # gives a hash of language / probability
82              
83             or the expert way (see section OPTIONS, under HOW TO PERFORM IDENTIFICATION)
84              
85             $a = langof( { method => [qw/smallwords prefix2 suffix2/] }, $text);
86              
87             $a = langof( { 'max-size' => 3_000_000 }, $text);
88              
89             $a = langof( { 'extract_from' => ( 'head' => 1, 'tail' => 2)}, $text);
90              
91             =head1 DESCRIPTION
92              
93             B
94              
95             C identifies the language a given string or file is
96             written in.
97              
98             See section WHY LINGUA::IDENTIFY for a list of C's strong
99             points.
100              
101             See section KNOWN LANGUAGES for a list of available languages and HOW TO
102             PERFORM IDENTIFICATION to know how to really use this module.
103              
104             If you're in a hurry, jump to section EXAMPLES, way down below.
105              
106             Also, don't forget to read the following section, IMPORTANT WARNING.
107              
108             =head1 A WARNING ON THE ACCURACY OF LANGUAGE IDENTIFICATION METHODS
109              
110             Take a word that exists in two different languages, take a good look at it and
111             answer this question: "What language does this word belong to?".
112              
113             You can't give an answer like "Language X", right? You can only say it looks
114             like any of a set of languages.
115              
116             Similarly, it isn't always easy to identify the language of a text if the only
117             two active languages are very similar.
118              
119             Now that we've taken out of the way the warning that language identification
120             is not 100% accurate, please keep reading the documentation.
121              
122             =head1 WHY LINGUA::IDENTIFY
123              
124             You might be wondering why you should use Lingua::Identify instead of any other
125             tool for language identification.
126              
127             Here's a list of Lingua::Identify's strong points:
128              
129             =over 6
130              
131             =item * it's free and it's open-source;
132              
133             =item * it's portable (it's Perl, which means it will work in lots of different
134             platforms);
135              
136             =item * unicode support;
137              
138             =item * 4 different methods of language identification and growing (see
139             METHODS OF LANGUAGE IDENTIFICATION for more details on this one);
140              
141             =item * it's a module, which means you can easily write your own application
142             (be it CGI, TK, whatever) around it;
143              
144             =item * it comes with I, which means you don't actually need to
145             write your own application around it;
146              
147             =item * it's flexible (at the moment, you can actually choose the
148             methods to use and their relevance, the max size of input to analyze
149             each time and which part(s) of the input to analyze)
150              
151             =item * it supports big inputs (through the 'max-size' and
152             'extract_from' options)
153              
154             =item * it's easy to deal with languages (you can activate and
155             deactivate the ones you choose whenever you want to, which can improve
156             your times and accuracy);
157              
158             =item * it's maintained.
159              
160             =back
161              
162             =cut
163              
164             # initialization
165              
166             our (@all_languages,@active_languages,%languages,%regexen,@methods);
167             BEGIN {
168              
169 7     7   6391 use Class::Factory::Util;
  7         4797  
  7         42  
170 7     7   1992 for ( Lingua::Identify->subclasses() ) {
171 196 100       17969 /^[A-Z]{2}(?:_[A-Z]{2})?$/ || next;
172 189         13787 eval "require Lingua::Identify::$_ ;";
173 189 50       1678 if ($languages{_versions}{lc $_} <= 0.02) {
174 0         0 for my $k (keys %languages) {
175 0 0       0 delete($languages{$k}{lc $_}) if exists $languages{$k}{lc $_};
176             }
177             }
178             }
179              
180 7         94 @all_languages = @active_languages = keys %{$languages{_names}};
  7         267  
181              
182 7         14057 @methods = qw/smallwords/;
183              
184             }
185              
186             =head1 HOW TO PERFORM IDENTIFICATION
187              
188             =head2 langof
189              
190             To identify the language a given text is written in, use the I function.
191             To get a single value, do:
192              
193             $language = langof($text);
194              
195             To get the most probable language and also the percentage of its probability,
196             do:
197              
198             ($language, $probability) = langof($text);
199              
200             If you want a hash where each active language is mapped into its percentage,
201             use this:
202              
203             %languages = langof($text);
204              
205             =cut
206              
207             sub langof {
208 72     72 1 17213 my %config = ();
209 72 100       338 %config = (%config, %{+shift}) if ref($_[0]) eq 'HASH';
  42         133  
210              
211             =head3 OPTIONS
212              
213             I can also be given some configuration parameters, in this way:
214              
215             $language = langof(\%config, $text);
216              
217             These parameters are detailed here:
218              
219             =over 6
220              
221             =item * B
222              
223             When the size of the input exceeds the C'max-size', C analyzes
224             only the beginning of the file. You can specify which part of the file
225             is analyzed with the 'extract-from' option:
226              
227             langof( { 'extract_from' => 'tail' } , $text );
228              
229             Possible values are 'head' and 'tail' (for now).
230              
231             You can also specify more than one part of the file, so that text is
232             extracted from those parts:
233              
234             langof( { 'extract_from' => [ 'head', 'tail' ] } , $text );
235              
236             (this will be useful when more than two possibilities exist)
237              
238             You can also specify different values for each part of the file (not
239             necessarily for all of them:
240              
241             langof( { 'extract_from' => { head => 40, tail => 60 } } , $text);
242              
243             The line above, for instance, retrives 40% of the text from the
244             beginning and 60% from the end. Note, however, that those values are
245             not percentages. You'd get the same behavior with:
246              
247             langof( { 'extract_from' => { head => 80, tail => 120 } } , $text);
248              
249             The percentages would be the same.
250              
251             =item * B
252              
253             By default, C analyzes only 1,000,000 bytes. You can specify
254             how many bytes (at the most) can be analyzed (if not enough exist, the
255             whole input is still analyzed).
256              
257             langof( { 'max-size' => 2000 }, $text);
258              
259             If you want all the text to be analyzed, set max-size to 0:
260              
261             langof( { 'max-size' => 0 }, $text);
262              
263             See also C.
264              
265             =item * B
266              
267             You can choose which method or methods to use, and also the relevance of each of
268             them.
269              
270             To choose a single method to use:
271              
272             langof( {method => 'smallwords' }, $text);
273              
274             To choose several methods:
275              
276             langof( {method => [qw/prefixes2 suffixes2/]}, $text);
277              
278             To choose several methods and give them different weight:
279              
280             langof( {method => {smallwords => 0.5, ngrams3 => 1.5} }, $text);
281              
282             To see the list of available methods, see section METHODS OF LANGUAGE
283             IDENTIFICATION.
284              
285             If no method is specified, the configuration for this parameter is the
286             following (this might change in the future):
287              
288             method => {
289             smallwords => 0.5,
290             prefixes2 => 1,
291             suffixes3 => 1,
292             ngrams3 => 1.3
293             };
294              
295             =item * B
296              
297             By default, C assumes C mode, but others are
298             available.
299              
300             In C mode, instead of actually calculating anything,
301             C only does the preparation it has to and then
302             returns a bunch of information, including the list of the active
303             languages, the selected methods, etc. It also returns the text meant
304             to be analised.
305              
306             Do be warned that, with I, the dummy mode still reads the
307             files, it simply doesn't calculate language.
308              
309             langof( { 'mode' => 'dummy' }, $text);
310              
311             This returns something like this:
312              
313             { 'methods' => { 'smallwords' => '0.5',
314             'prefixes2' => '1',
315             },
316             'config' => { 'mode' => 'dummy' },
317             'max-size' => 1000000,
318             'active-languages' => [ 'es', 'pt' ],
319             'text' => $text,
320             'mode' => 'dummy',
321             }
322              
323             =back
324              
325             =cut
326              
327             # select the methods
328 72 100       682 my %methods = defined $config{'method'} ? _make_hash($config{'method'})
329             : %default_methods;
330              
331             # select max-size
332 72 100       290 my $maxsize = defined $config{'max-size'} ? $config{'max-size'}
333             : $default_maxsize;
334              
335             # get the text
336 72         452 my $text = join "\n", @_;
337 72 100       208 return wantarray ? () : undef unless $text;
    100          
338              
339             # this is the support for big files; if the input is bigger than the $maxsize, we act
340 70 100 100     986 if ($maxsize < length $text && $maxsize != 0) {
341             # select extract_from
342 1 50       9 my %extractfrom = defined $config{'extract_from'} ? _make_hash($config{'extract_from'})
343             : %default_extractfrom;
344 1         4 my $total_weight = 0;
345 1         5 for (keys %extractfrom) {
346 1 50 33     8 if ($_ eq 'head' or $_ eq 'tail') {
347 1         5 $total_weight += $extractfrom{$_};
348 1         3 next;
349             }
350             else {
351 0         0 delete $extractfrom{$_};
352             }
353             }
354 1         11 for (keys %extractfrom) {
355 1         7 $extractfrom{$_} = $extractfrom{$_} / $total_weight;
356             }
357              
358 1   50     6 $extractfrom{'head'} ||= 0;
359 1   50     8 $extractfrom{'tail'} ||= 0;
360              
361 1         5 my $head = int $maxsize * $extractfrom{'head'};
362 1         4 my $tail = length($text) - $head - int $maxsize * $extractfrom{'tail'};
363 1         7 substr( $text, $head, $tail, '');
364             }
365              
366             # dummy mode exits here
367 70   100     430 $config{'mode'} ||= 'normal';
368 70 100       219 if ($config{'mode'} eq 'dummy') {
369             return {
370 7         25 'method' => \%methods,
371             'max-size' => $maxsize,
372             'config' => \%config,
373             'active-languages' => [ sort (get_active_languages()) ],
374             'text' => $text,
375             'mode' => $config{'mode'},
376             };
377             }
378              
379             # use the methods
380 63         75 my (%result, $total);
381 63         188 for (keys %methods) {
382 295         449 my %temp_result;
383              
384 295 100       2891 if (/^letters$/) {
    100          
    100          
    100          
    50          
385 58         213 %temp_result = _langof_by_used_letters('letters', $text);
386             }
387             elsif (/^smallwords$/) {
388 60         294 %temp_result = _langof_by_word_method('smallwords', $text);
389             }
390             elsif (/^(prefixes[1-4])$/) {
391 58         249 %temp_result = _langof_by_prefix_method($1, $text);
392             }
393             elsif (/^(suffixes[1-4])$/) {
394 61         277 %temp_result = _langof_by_suffix_method($1, $text);
395             }
396             elsif (/^(ngrams[1-4])$/) {
397 58         250 %temp_result = _langof_by_ngram_method($1, $text);
398             }
399              
400 295         3729 for my $l (keys %temp_result) {
401 4733         7726 my $temp = $temp_result{$l} * $methods{$_};
402 4733         5684 $result{$l} += $temp;
403 4733         7054 $total += $temp;
404             }
405             }
406              
407             # report the results
408 1587 50       3516 my @result = (
409 5600         6829 map { ( $_, ($total ? $result{$_} / $total : 0)) }
410 63         434 sort { $result{$b} <=> $result{$a} } keys %result
411             );
412              
413 63 100       1705 return wantarray ? @result : $result[0];
414             }
415              
416             sub _make_hash {
417 11     11   17 my %hash;
418 11         20 my $temp = shift;
419 11         29 for (ref($temp)) {
420 11 100       51 if (/^HASH$/) {
    100          
421 1         2 %hash = %{$temp};
  1         7  
422             }
423             elsif (/^ARRAY$/) {
424 1         2 for (@{$temp}) {
  1         3  
425 2         7 $hash{$_}++;
426             }
427             }
428             else {
429 9         43 $hash{$temp} = 1;
430             }
431             }
432 11         51 %hash;
433             }
434              
435             =head2 langof_file
436              
437             I works just like I, with the exception that it
438             reveives filenames instead of text. It reads these texts (if existing
439             and readable, of course) and parses its content.
440              
441             Currently, I assumes the files are regular text. This may
442             change in the future and the files might be scanned to check their
443             filetype and then parsed to extract only their textual content (which
444             should be pretty useful so that you can perform language
445             identification, say, in HTML files, or PDFs).
446              
447             To identify the language a file is written in:
448              
449             $language = langof_file($path);
450              
451             To get the most probable language and also the percentage of its probability,
452             do:
453              
454             ($language, $probability) = langof_file($path);
455              
456             If you want a hash where each active language is mapped into its percentage,
457             use this:
458              
459             %languages = langof_file($path);
460              
461             If you pass more than one file to I, they will all be
462             read and their content merged and then parsed for language
463             identification.
464              
465             =cut
466              
467             sub langof_file {
468 31     31 1 21768 my %config = ();
469 31 100       156 if (ref($_[0]) eq 'HASH') {%config = (%config, %{+shift})}
  2         5  
  2         11  
470              
471             =head3 OPTIONS
472              
473             I accepts all the options I does, so refer to
474             those first (up in this document).
475              
476             $language = langof_file(\%config, $path);
477              
478             I currently only reads the first 10,000 bytes of each
479             file.
480              
481             You can force an input encoding with C<< { encoding => 'ISO-8859-1' } >>
482             in the configuration hash.
483              
484             =cut
485              
486             # select max-size
487 31 50       142 my $maxsize = defined $config{'max-size'} ? $config{'max-size'}
488             : $default_maxsize;
489              
490 31         97 my @files = @_;
491 31         74 my $text = '';
492              
493 31         72 for my $file (@files) {
494             #-r and -e or next;
495 32 100       114 if (exists($config{encoding})) {
496 1 50   1   45 open(FILE, "<:encoding($config{encoding})", $file) or next;
  1         9  
  1         3  
  1         8  
497             } else {
498 31 50       1739 open(FILE, "<:utf8", $file) or next;
499             }
500 32         13707 local $/ = \$maxsize;
501 32         1980 $text .= ;
502 32         812 close(FILE);
503             }
504              
505 31         134 return langof(\%config,$text);
506             }
507              
508             =head2 confidence
509              
510             After getting the results into an array, its first element is the most probable
511             language. That doesn't mean it is very probable or not.
512              
513             You can find more about the likeliness of the results to be accurate by
514             computing its confidence level.
515              
516             use Lingua::Identify qw/:language_identification/;
517             my @results = langof($text);
518             my $confidence_level = confidence(@results);
519             # $confidence_level now holds a value between 0.5 and 1; the higher that
520             # value, the more accurate the results seem to be
521              
522             The formula used is pretty simple: p1 / (p1 + p2) , where p1 is the
523             probability of the most likely language and p2 is the probability of
524             the language which came in second. A couple of examples to illustrate
525             this:
526              
527             English 50% Portuguese 10% ...
528              
529             confidence level: 50 / (50 + 10) = 0.83
530              
531             Another example:
532              
533             Spanish 30% Portuguese 10% ...
534              
535             confidence level: 30 / (25 + 30) = 0.55
536              
537             French 10% German 5% ...
538              
539             confidence level: 10 / (10 + 5) = 0.67
540              
541             As you can see, the first example is probably the most accurate one.
542             Are there any doubts? The English language has five times the
543             probability of the second language.
544              
545             The second example is a bit more tricky. 55% confidence. The
546             confidence level is always above 50%, for obvious reasons. 55% doesn't
547             make anyone confident in the results, and one shouldn't be, with
548             results such as these.
549              
550             Notice the third example. The confidence level goes up to 67%, but the
551             probability of French is of mere 10%. So what? It's twice as much as
552             the second language. The low probability may well be caused by a great
553             number of languages in play.
554              
555             =cut
556              
557             sub confidence {
558 62 100 66 62 1 90196 defined $_[1] and $_[1] or return 0;
559 59 100 33     427 defined $_[3] and $_[3] or return 1;
560 58         359 $_[1] / ($_[1] + $_[3]);
561             }
562              
563             =head2 get_all_methods
564              
565             Returns a list comprised of all the available methods for language
566             identification.
567              
568             =cut
569              
570             sub get_all_methods {
571 1     1 1 28 qw/smallwords
572             prefixes1 prefixes2 prefixes3 prefixes4
573             suffixes1 suffixes2 suffixes3 suffixes4
574             ngrams1 ngrams2 ngrams3 ngrams4/
575             }
576              
577             =head1 LANGUAGE IDENTIFICATION IN GENERAL
578              
579             Language identification is based in patterns.
580              
581             In order to identify the language a given text is written in, we repeat a given
582             process for each active language (see section LANGUAGES MANIPULATION); in that
583             process, we look for common patterns of that language. Those patterns can be
584             prefixes, suffixes, common words, ngrams or even sequences of words.
585              
586             After repeating the process for each language, the total score for each of them
587             is then used to compute the probability (in percentage) for each language to be
588             the one of that text.
589              
590             =cut
591              
592             sub _langof_by_method {
593 237     237   900 my ($method, $elements, $text) = @_;
594 237         341 my (%result, $total);
595              
596 237         703 for my $language (get_active_languages()) {
597 6346         6932 for (keys %{$languages{$method}{$language}}) {
  6346         52534  
598 173900 100       321310 if (exists $$elements{$_}) {
599 19274         50134 $result{$language} +=
600             $$elements{$_} * ${languages{$method}{$language}{$_}};
601 19274         43806 $total +=
602             $$elements{$_} * ${languages{$method}{$language}{$_}};
603             }
604             }
605             }
606              
607 3167 50       7917 my @result = (
608 9268         11927 map { ( $_, ($total ? $result{$_} / $total : 0)) }
609 237         2047 sort { $result{$b} <=> $result{$a} } keys %result
610             );
611              
612 237 50       11136 return wantarray ? @result : $result[0];
613             }
614              
615             =head1 METHODS OF LANGUAGE IDENTIFICATION
616              
617             C currently comprises four different ways for language
618             identification, in a total of thirteen variations of those.
619              
620             The available methods are the following: B, B,
621             B, B, B, B, B,
622             B, B, B, B, B and B.
623              
624             Here's a more detailed explanation of each of those ways and those methods
625              
626             =head2 Small Word Technique - B
627              
628             The "Small Word Technique" searches the text for the most common words of each
629             active language. These words are usually articles, pronouns, etc, which happen
630             to be (usually) the shortest words of the language; hence, the method name.
631              
632             This is usually a good method for big texts, especially if you happen to have
633             few languages active.
634              
635             =cut
636              
637             sub _langof_by_word_method {
638 60     60   291 my ($method, $text) = @_;
639              
640             sub _words_count {
641 60     60   152 my ($words, $text) = @_;
642 60         20186 for my $word (split /[\s\n]+/, $text) {
643 17632         33905 $words->{$word}++
644             }
645             }
646              
647 60         154 my %words;
648 60         262 _words_count(\%words, $text);
649 60         1593 return _langof_by_method($method, \%words, $text);
650             }
651              
652             =head2 Used Letters - B
653              
654             In this technique used letters are compared with the letters from each
655             language. This method is not meant to be used alone, but to help
656             other methods to differentiate between dialects.
657              
658             =cut
659              
660             sub _langof_by_used_letters {
661 58     58   259 my ($method, $text) = @_;
662              
663 58         515 my $letters = ngram_counts( { spaces => 0 }, $text, 1);
664              
665 58         104078 my ($total, %result);
666 58         207 for my $language (get_active_languages()) {
667 1566         2777 $result{$language} = 1;
668 1566         12228 for my $l (keys %$letters) {
669 44469 100       120159 if (not exists($languages{letters}{$language}{$l})) {
670 10144         21640 $result{$language} -= .1;
671             }
672             }
673 1566 100       7144 $result{$language} = 0 if $result{$language} < 0;
674 1566         2517 $total += $result{$language};
675             }
676 1566 50       3782 my @result = (
677 5069         6143 map { ( $_, ($total ? $result{$_} / $total : 0)) }
678 58         609 sort { $result{$b} <=> $result{$a} } keys %result
679             );
680              
681 58 50       1678 return wantarray ? @result : $result[0];
682             }
683              
684              
685             =head2 Prefix Analysis - B, B, B, B
686              
687             This method analyses text for the common prefixes of each active language.
688              
689             The methods are, respectively, for prefixes of size 1, 2, 3 and 4.
690              
691             =cut
692              
693             sub _langof_by_prefix_method {
694 7     7   7103 use Text::Affixes;
  7         26870  
  7         1089  
695              
696 58     58   389 (my $method = shift) =~ /^prefixes(\d)$/;
697 58         223 my $text = shift;
698              
699 58         607 my $prefixes = get_prefixes( {min => $1, max => $1}, $text);
700              
701 58         78680 return _langof_by_method($method, $$prefixes{$1}, $text);
702             }
703              
704             =head2 Suffix Analysis - B, B, B, B
705              
706             Similar to the Prefix Analysis (see above), but instead analysing common
707             suffixes.
708              
709             The methods are, respectively, for suffixes of size 1, 2, 3 and 4.
710              
711             =cut
712              
713             sub _langof_by_suffix_method {
714 7     7   47 use Text::Affixes;
  7         14  
  7         886  
715              
716 61     61   389 (my $method = shift) =~ /^suffixes(\d)$/;
717 61         293 my $text = shift;
718              
719 61         610 my $suffixes = get_suffixes({min => $1, max => $1}, $text);
720              
721 61         78743 return _langof_by_method($method, $$suffixes{$1}, $text);
722             }
723              
724             ###
725              
726             # Have you seen my brother? He's a two line long comment. I think he
727             # might be lost... :-\ Me and my father have been looking for him for
728             # some time now :-/
729              
730             ###
731              
732             =head2 Ngram Categorization - B, B, B, B
733              
734             Ngrams are sequences of tokens. You can think of them as syllables, but they
735             are also more than that, as they are not only comprised by characters, but also
736             by spaces (delimiting or separating words).
737              
738             Ngrams are a very good way for identifying languages, given that the most
739             common ones of each language are not generally very common in others.
740              
741             This is usually the best method for small amounts of text or too many active
742             languages.
743              
744             The methods are, respectively, for ngrams of size 1, 2, 3 and 4.
745              
746             =cut
747              
748             sub _langof_by_ngram_method {
749 7     7   5687 use Text::Ngram qw(ngram_counts);
  7         22564  
  7         11309  
750              
751 58     58   339 (my $method = shift) =~ /^ngrams([1-4])$/;
752 58         182 my $text = shift;
753              
754 58         445 my $ngrams = ngram_counts( {spaces => 0}, $text, $1);
755              
756 58 50       219808 $ngrams = 'letters' if $ngrams eq "ngrams1";
757 58         246 return _langof_by_method($method, $ngrams, $text);
758             }
759              
760             =head1 LANGUAGE MANIPULATION
761              
762             When trying to perform language identification, C works not with
763             all available languages, but instead with the ones that are active.
764              
765             By default, all available languages are active, but that can be changed by the
766             user.
767              
768             For your convenience, several methods regarding language manipulation were
769             created. In order to use them, load the module with the tag
770             :language_manipulation.
771              
772             These methods work with the two letters code for languages.
773              
774             =over 6
775              
776             =item B
777              
778             Activate a language
779              
780             activate_language('en');
781              
782             # or
783              
784             activate_language($_) for get_all_languages();
785              
786             =cut
787              
788             sub activate_language {
789 1 50   1 1 11 unless (grep { $_ eq $_[0] } @active_languages) {
  0         0  
790 1         4 push @active_languages, $_[0];
791             }
792 1         5 return @active_languages;
793             }
794              
795             =item B
796              
797             Activates all languages
798              
799             activate_all_languages();
800              
801             =cut
802              
803             sub activate_all_languages {
804 2     2 1 6 @active_languages = get_all_languages();
805 2         16 return @active_languages;
806             }
807              
808             =item B
809              
810             Deactivates a language
811              
812             deactivate_language('en');
813              
814             =cut
815              
816             sub deactivate_language {
817 1     1 1 5 @active_languages = grep { ! ($_ eq $_[0]) } @active_languages;
  27         40  
818 1         13 return @active_languages;
819             }
820              
821             =item B
822              
823             Deactivates all languages
824              
825             deactivate_all_languages();
826              
827             =cut
828              
829             sub deactivate_all_languages {
830 3     3 1 15 @active_languages = ();
831 3         13 return @active_languages;
832             }
833              
834             =item B
835              
836             Returns the names of all available languages
837              
838             my @all_languages = get_all_languages();
839              
840             =cut
841              
842             sub get_all_languages {
843 55     55 1 1067 return @all_languages;
844             }
845              
846             =item B
847              
848             Returns the names of all active languages
849              
850             my @active_languages = get_active_languages();
851              
852             =cut
853              
854             sub get_active_languages {
855 367     367 1 2873 return @active_languages;
856             }
857              
858             =item B
859              
860             Returns the names of all inactive languages
861              
862             my @active_languages = get_inactive_languages();
863              
864             =cut
865              
866             sub get_inactive_languages {
867 2     2 1 739 return grep { ! is_active($_) } get_all_languages();
  54         67  
868             }
869              
870             =item B
871              
872             Returns the name of the language if it is active, an empty list otherwise
873              
874             if (is_active('en')) {
875             # YOUR CODE HERE
876             }
877              
878             =cut
879              
880             sub is_active {
881 56     56 1 73 return grep { $_ eq $_[0] } get_active_languages();
  729         854  
882             }
883              
884             =item B
885              
886             Returns the name of the language if it exists, an empty list otherwise
887              
888             if (is_valid_language('en')) {
889             # YOUR CODE HERE
890             }
891              
892             =cut
893              
894             sub is_valid_language {
895 37     37 1 13243 return grep { $_ eq $_[0] } get_all_languages();
  999         1359  
896             }
897              
898             =item B
899              
900             Sets the active languages
901              
902             set_active_languages('en', 'pt');
903              
904             # or
905              
906             set_active_languages(get_all_languages());
907              
908             =cut
909              
910             sub set_active_languages {
911 3     3 1 11 @active_languages = grep { is_valid_language($_) } @_;
  6         18  
912 3         25 return @active_languages;
913             }
914              
915             =item B
916              
917             Given the two letter tag of a language, returns its name
918              
919             my $language_name = name_of('pt');
920              
921             =cut
922              
923             sub name_of {
924 1   50 1 1 4 my $tag = shift || return undef;
925 1         7 return $languages{_names}{$tag};
926             }
927              
928             =back
929              
930             =cut
931              
932             1;
933             __END__