File Coverage

blib/lib/Lingua/Identify.pm
Criterion Covered Total %
statement 161 165 97.5
branch 49 62 79.0
condition 13 20 65.0
subroutine 33 33 100.0
pod 15 15 100.0
total 271 295 91.8


line stmt bran cond sub pod time code
1             package Lingua::Identify;
2              
3 7     7   164532 use 5.006;
  7         28  
  7         278  
4 7     7   47 use strict;
  7         13  
  7         259  
5 7     7   35 use warnings;
  7         18  
  7         222  
6              
7 7     7   5050 use utf8;
  7         53  
  7         39  
8 7     7   241 use base 'Exporter';
  7         14  
  7         2837  
9              
10             our %EXPORT_TAGS =
11             (
12             all => [ qw(
13             langof
14             langof_file
15             confidence
16             get_all_methods
17             activate_all_languages
18             deactivate_all_languages
19             get_all_languages
20             get_active_languages
21             get_inactive_languages
22             is_active
23             is_valid_language
24             activate_language
25             deactivate_language
26             set_active_languages
27             name_of
28             )
29             ],
30             language_identification => [ qw(
31             langof
32             langof_file
33             confidence
34             get_all_methods
35             )
36             ],
37              
38             language_manipulation => [ qw(
39             activate_all_languages
40             deactivate_all_languages
41             get_all_languages
42             get_active_languages
43             get_inactive_languages
44             is_active
45             is_valid_language
46             activate_language
47             deactivate_language
48             set_active_languages
49             name_of
50             )
51             ],
52             );
53              
54             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
55             our @EXPORT = qw();
56              
57             our $VERSION = '0.56';
58              
59              
60             # DEFAULT VALUES #
61             our %default_methods = qw/smallwords 1.3 prefixes2 1.5 suffixes3 1.5 ngrams3 1.2/;
62             my $default_maxsize = 1_000_000;
63             my %default_extractfrom = qw/head 1/;
64              
65             =head1 NAME
66              
67             Lingua::Identify - Language identification
68              
69             =head1 SYNOPSIS
70              
71             use Lingua::Identify qw(:language_identification);
72             $a = langof($textstring); # gives the most probable language
73              
74             or the complete way:
75              
76             @a = langof($textstring); # gives pairs of languages / probabilities
77             # sorted from most to least probable
78              
79             %a = langof($textstring); # gives a hash of language / probability
80              
81             or the expert way (see section OPTIONS, under HOW TO PERFORM IDENTIFICATION)
82              
83             $a = langof( { method => [qw/smallwords prefix2 suffix2/] }, $text);
84              
85             $a = langof( { 'max-size' => 3_000_000 }, $text);
86              
87             $a = langof( { 'extract_from' => ( 'head' => 1, 'tail' => 2)}, $text);
88              
89             =head1 DESCRIPTION
90              
91             B
92              
93             C identifies the language a given string or file is
94             written in.
95              
96             See section WHY LINGUA::IDENTIFY for a list of C's strong
97             points.
98              
99             See section KNOWN LANGUAGES for a list of available languages and HOW TO
100             PERFORM IDENTIFICATION to know how to really use this module.
101              
102             If you're in a hurry, jump to section EXAMPLES, way down below.
103              
104             Also, don't forget to read the following section, IMPORTANT WARNING.
105              
106             =head1 A WARNING ON THE ACCURACY OF LANGUAGE IDENTIFICATION METHODS
107              
108             Take a word that exists in two different languages, take a good look at it and
109             answer this question: "What language does this word belong to?".
110              
111             You can't give an answer like "Language X", right? You can only say it looks
112             like any of a set of languages.
113              
114             Similarly, it isn't always easy to identify the language of a text if the only
115             two active languages are very similar.
116              
117             Now that we've taken out of the way the warning that language identification
118             is not 100% accurate, please keep reading the documentation.
119              
120             =head1 WHY LINGUA::IDENTIFY
121              
122             You might be wondering why you should use Lingua::Identify instead of any other
123             tool for language identification.
124              
125             Here's a list of Lingua::Identify's strong points:
126              
127             =over 6
128              
129             =item * it's free and it's open-source;
130              
131             =item * it's portable (it's Perl, which means it will work in lots of different
132             platforms);
133              
134             =item * unicode support;
135              
136             =item * 4 different methods of language identification and growing (see
137             METHODS OF LANGUAGE IDENTIFICATION for more details on this one);
138              
139             =item * it's a module, which means you can easily write your own application
140             (be it CGI, TK, whatever) around it;
141              
142             =item * it comes with I, which means you don't actually need to
143             write your own application around it;
144              
145             =item * it's flexible (at the moment, you can actually choose the
146             methods to use and their relevance, the max size of input to analyze
147             each time and which part(s) of the input to analyze)
148              
149             =item * it supports big inputs (through the 'max-size' and
150             'extract_from' options)
151              
152             =item * it's easy to deal with languages (you can activate and
153             deactivate the ones you choose whenever you want to, which can improve
154             your times and accuracy);
155              
156             =item * it's maintained.
157              
158             =back
159              
160             =cut
161              
162             # initialization
163              
164             our (@all_languages,@active_languages,%languages,%regexen,@methods);
165             BEGIN {
166              
167 7     7   6465 use Class::Factory::Util;
  7         4777  
  7         42  
168 7     7   1629 for ( Lingua::Identify->subclasses() ) {
169 189 100       17904 /^[A-Z][A-Z]$/ || next;
170 182         13830 eval "require Lingua::Identify::$_ ;";
171 182 50       1578 if ($languages{_versions}{lc $_} < 0.02) {
172 0         0 for my $k (keys %languages) {
173 0 0       0 delete($languages{$k}{lc $_}) if exists $languages{$k}{lc $_};
174             }
175             }
176             }
177              
178 7         87 @all_languages = @active_languages = keys %{$languages{_names}};
  7         140  
179              
180 7         14750 @methods = qw/smallwords/;
181              
182             }
183              
184             =head1 HOW TO PERFORM IDENTIFICATION
185              
186             =head2 langof
187              
188             To identify the language a given text is written in, use the I function.
189             To get a single value, do:
190              
191             $language = langof($text);
192              
193             To get the most probable language and also the percentage of its probability,
194             do:
195              
196             ($language, $probability) = langof($text);
197              
198             If you want a hash where each active language is mapped into its percentage,
199             use this:
200              
201             %languages = langof($text);
202              
203             =cut
204              
205             sub langof {
206 70     70 1 13705 my %config = ();
207 70 100       314 %config = (%config, %{+shift}) if ref($_[0]) eq 'HASH';
  41         167  
208              
209             =head3 OPTIONS
210              
211             I can also be given some configuration parameters, in this way:
212              
213             $language = langof(\%config, $text);
214              
215             These parameters are detailed here:
216              
217             =over 6
218              
219             =item * B
220              
221             When the size of the input exceeds the C'max-size', C analyzes
222             only the beginning of the file. You can specify which part of the file
223             is analyzed with the 'extract-from' option:
224              
225             langof( { 'extract_from' => 'tail' } , $text );
226              
227             Possible values are 'head' and 'tail' (for now).
228              
229             You can also specify more than one part of the file, so that text is
230             extracted from those parts:
231              
232             langof( { 'extract_from' => [ 'head', 'tail' ] } , $text );
233              
234             (this will be useful when more than two possibilities exist)
235              
236             You can also specify different values for each part of the file (not
237             necessarily for all of them:
238              
239             langof( { 'extract_from' => { head => 40, tail => 60 } } , $text);
240              
241             The line above, for instance, retrives 40% of the text from the
242             beginning and 60% from the end. Note, however, that those values are
243             not percentages. You'd get the same behavior with:
244              
245             langof( { 'extract_from' => { head => 80, tail => 120 } } , $text);
246              
247             The percentages would be the same.
248              
249             =item * B
250              
251             By default, C analyzes only 1,000,000 bytes. You can specify
252             how many bytes (at the most) can be analyzed (if not enough exist, the
253             whole input is still analyzed).
254              
255             langof( { 'max-size' => 2000 }, $text);
256              
257             If you want all the text to be analyzed, set max-size to 0:
258              
259             langof( { 'max-size' => 0 }, $text);
260              
261             See also C.
262              
263             =item * B
264              
265             You can choose which method or methods to use, and also the relevance of each of
266             them.
267              
268             To choose a single method to use:
269              
270             langof( {method => 'smallwords' }, $text);
271              
272             To choose several methods:
273              
274             langof( {method => [qw/prefixes2 suffixes2/]}, $text);
275              
276             To choose several methods and give them different weight:
277              
278             langof( {method => {smallwords => 0.5, ngrams3 => 1.5} }, $text);
279              
280             To see the list of available methods, see section METHODS OF LANGUAGE
281             IDENTIFICATION.
282              
283             If no method is specified, the configuration for this parameter is the
284             following (this might change in the future):
285              
286             method => {
287             smallwords => 0.5,
288             prefixes2 => 1,
289             suffixes3 => 1,
290             ngrams3 => 1.3
291             };
292              
293             =item * B
294              
295             By default, C assumes C mode, but others are
296             available.
297              
298             In C mode, instead of actually calculating anything,
299             C only does the preparation it has to and then
300             returns a bunch of information, including the list of the active
301             languages, the selected methods, etc. It also returns the text meant
302             to be analised.
303              
304             Do be warned that, with I, the dummy mode still reads the
305             files, it simply doesn't calculate language.
306              
307             langof( { 'mode' => 'dummy' }, $text);
308              
309             This returns something like this:
310              
311             { 'methods' => { 'smallwords' => '0.5',
312             'prefixes2' => '1',
313             },
314             'config' => { 'mode' => 'dummy' },
315             'max-size' => 1000000,
316             'active-languages' => [ 'es', 'pt' ],
317             'text' => $text,
318             'mode' => 'dummy',
319             }
320              
321             =back
322              
323             =cut
324              
325             # select the methods
326 70 100       612 my %methods = defined $config{'method'} ? _make_hash($config{'method'})
327             : %default_methods;
328              
329             # select max-size
330 70 100       274 my $maxsize = defined $config{'max-size'} ? $config{'max-size'}
331             : $default_maxsize;
332              
333             # get the text
334 70         464 my $text = join "\n", @_;
335 70 100       245 return wantarray ? () : undef unless $text;
    100          
336              
337             # this is the support for big files; if the input is bigger than the $maxsize, we act
338 68 100 100     1253 if ($maxsize < length $text && $maxsize != 0) {
339             # select extract_from
340 1 50       8 my %extractfrom = defined $config{'extract_from'} ? _make_hash($config{'extract_from'})
341             : %default_extractfrom;
342 1         3 my $total_weight = 0;
343 1         3 for (keys %extractfrom) {
344 1 50 33     7 if ($_ eq 'head' or $_ eq 'tail') {
345 1         4 $total_weight += $extractfrom{$_};
346 1         3 next;
347             }
348             else {
349 0         0 delete $extractfrom{$_};
350             }
351             }
352 1         14 for (keys %extractfrom) {
353 1         6 $extractfrom{$_} = $extractfrom{$_} / $total_weight;
354             }
355              
356 1   50     5 $extractfrom{'head'} ||= 0;
357 1   50     7 $extractfrom{'tail'} ||= 0;
358              
359 1         4 my $head = int $maxsize * $extractfrom{'head'};
360 1         3 my $tail = length($text) - $head - int $maxsize * $extractfrom{'tail'};
361 1         6 substr( $text, $head, $tail, '');
362             }
363              
364             # dummy mode exits here
365 68   100     427 $config{'mode'} ||= 'normal';
366 68 100       258 if ($config{'mode'} eq 'dummy') {
367             return {
368 7         20 'method' => \%methods,
369             'max-size' => $maxsize,
370             'config' => \%config,
371             'active-languages' => [ sort (get_active_languages()) ],
372             'text' => $text,
373             'mode' => $config{'mode'},
374             };
375             }
376              
377             # use the methods
378 61         98 my (%result, $total);
379 61         176 for (keys %methods) {
380 229         334 my %temp_result;
381              
382 229 100       1661 if (/^smallwords$/) {
    100          
    100          
    50          
383 58         233 %temp_result = _langof_by_word_method('smallwords', $text);
384             }
385             elsif (/^(prefixes[1-4])$/) {
386 56         236 %temp_result = _langof_by_prefix_method($1, $text);
387             }
388             elsif (/^(suffixes[1-4])$/) {
389 59         246 %temp_result = _langof_by_suffix_method($1, $text);
390             }
391             elsif (/^(ngrams[1-4])$/) {
392 56         232 %temp_result = _langof_by_ngram_method($1, $text);
393             }
394              
395 229         3237 for my $l (keys %temp_result) {
396 3154         4667 my $temp = $temp_result{$l} * $methods{$_};
397 3154         3989 $result{$l} += $temp;
398 3154         5106 $total += $temp;
399             }
400             }
401              
402             # report the results
403 1040 50       2595 my @result = (
404 3378         4152 map { ( $_, ($total ? $result{$_} / $total : 0)) }
405 61         400 sort { $result{$b} <=> $result{$a} } keys %result
406             );
407              
408 61 100       1438 return wantarray ? @result : $result[0];
409             }
410              
411             sub _make_hash {
412 11     11   21 my %hash;
413 11         21 my $temp = shift;
414 11         27 for (ref($temp)) {
415 11 100       52 if (/^HASH$/) {
    100          
416 1         2 %hash = %{$temp};
  1         6  
417             }
418             elsif (/^ARRAY$/) {
419 1         2 for (@{$temp}) {
  1         3  
420 2         7 $hash{$_}++;
421             }
422             }
423             else {
424 9         32 $hash{$temp} = 1;
425             }
426             }
427 11         47 %hash;
428             }
429              
430             =head2 langof_file
431              
432             I works just like I, with the exception that it
433             reveives filenames instead of text. It reads these texts (if existing
434             and readable, of course) and parses its content.
435              
436             Currently, I assumes the files are regular text. This may
437             change in the future and the files might be scanned to check their
438             filetype and then parsed to extract only their textual content (which
439             should be pretty useful so that you can perform language
440             identification, say, in HTML files, or PDFs).
441              
442             To identify the language a file is written in:
443              
444             $language = langof_file($path);
445              
446             To get the most probable language and also the percentage of its probability,
447             do:
448              
449             ($language, $probability) = langof_file($path);
450              
451             If you want a hash where each active language is mapped into its percentage,
452             use this:
453              
454             %languages = langof_file($path);
455              
456             If you pass more than one file to I, they will all be
457             read and their content merged and then parsed for language
458             identification.
459              
460             =cut
461              
462             sub langof_file {
463 30     30 1 15257 my %config = ();
464 30 100       161 if (ref($_[0]) eq 'HASH') {%config = (%config, %{+shift})}
  2         6  
  2         25  
465              
466             =head3 OPTIONS
467              
468             I accepts all the options I does, so refer to
469             those first (up in this document).
470              
471             $language = langof_file(\%config, $path);
472              
473             I currently only reads the first 10,000 bytes of each
474             file.
475              
476             You can force an input encoding with C<< { encoding => 'ISO-8859-1' } >>
477             in the configuration hash.
478              
479             =cut
480              
481             # select max-size
482 30 50       146 my $maxsize = defined $config{'max-size'} ? $config{'max-size'}
483             : $default_maxsize;
484              
485 30         80 my @files = @_;
486 30         64 my $text = '';
487              
488 30         94 for my $file (@files) {
489             #-r and -e or next;
490 31 100       100 if (exists($config{encoding})) {
491 1 50   1   46 open(FILE, "<:encoding($config{encoding})", $file) or next;
  1         11  
  1         2  
  1         8  
492             } else {
493 30 50       1618 open(FILE, "<:utf8", $file) or next;
494             }
495 31         16116 local $/ = \$maxsize;
496 31         1999 $text .= ;
497 31         644 close(FILE);
498             }
499              
500 30         137 return langof(\%config,$text);
501             }
502              
503             =head2 confidence
504              
505             After getting the results into an array, its first element is the most probable
506             language. That doesn't mean it is very probable or not.
507              
508             You can find more about the likeliness of the results to be accurate by
509             computing its confidence level.
510              
511             use Lingua::Identify qw/:language_identification/;
512             my @results = langof($text);
513             my $confidence_level = confidence(@results);
514             # $confidence_level now holds a value between 0.5 and 1; the higher that
515             # value, the more accurate the results seem to be
516              
517             The formula used is pretty simple: p1 / (p1 + p2) , where p1 is the
518             probability of the most likely language and p2 is the probability of
519             the language which came in second. A couple of examples to illustrate
520             this:
521              
522             English 50% Portuguese 10% ...
523              
524             confidence level: 50 / (50 + 10) = 0.83
525              
526             Another example:
527              
528             Spanish 30% Portuguese 10% ...
529              
530             confidence level: 30 / (25 + 30) = 0.55
531              
532             French 10% German 5% ...
533              
534             confidence level: 10 / (10 + 5) = 0.67
535              
536             As you can see, the first example is probably the most accurate one.
537             Are there any doubts? The English language has five times the
538             probability of the second language.
539              
540             The second example is a bit more tricky. 55% confidence. The
541             confidence level is always above 50%, for obvious reasons. 55% doesn't
542             make anyone confident in the results, and one shouldn't be, with
543             results such as these.
544              
545             Notice the third example. The confidence level goes up to 67%, but the
546             probability of French is of mere 10%. So what? It's twice as much as
547             the second language. The low probability may well be caused by a great
548             number of languages in play.
549              
550             =cut
551              
552             sub confidence {
553 60 100 66 60 1 87608 defined $_[1] and $_[1] or return 0;
554 57 100 66     452 defined $_[3] and $_[3] or return 1;
555 56         408 $_[1] / ($_[1] + $_[3]);
556             }
557              
558             =head2 get_all_methods
559              
560             Returns a list comprised of all the available methods for language
561             identification.
562              
563             =cut
564              
565             sub get_all_methods {
566 1     1 1 27 qw/smallwords
567             prefixes1 prefixes2 prefixes3 prefixes4
568             suffixes1 suffixes2 suffixes3 suffixes4
569             ngrams1 ngrams2 ngrams3 ngrams4/
570             }
571              
572             =head1 LANGUAGE IDENTIFICATION IN GENERAL
573              
574             Language identification is based in patterns.
575              
576             In order to identify the language a given text is written in, we repeat a given
577             process for each active language (see section LANGUAGES MANIPULATION); in that
578             process, we look for common patterns of that language. Those patterns can be
579             prefixes, suffixes, common words, ngrams or even sequences of words.
580              
581             After repeating the process for each language, the total score for each of them
582             is then used to compute the probability (in percentage) for each language to be
583             the one of that text.
584              
585             =cut
586              
587             sub _langof_by_method {
588 229     229   673 my ($method, $elements, $text) = @_;
589 229         280 my (%result, $total);
590              
591 229         569 for my $language (get_active_languages()) {
592 5903         6203 for (keys %{$languages{$method}{$language}}) {
  5903         40692  
593 116140 100       245400 if (exists $$elements{$_}) {
594 15746         41310 $result{$language} +=
595             $$elements{$_} * ${languages{$method}{$language}{$_}};
596 15746         36930 $total +=
597             $$elements{$_} * ${languages{$method}{$language}{$_}};
598             }
599             }
600             }
601              
602 3154 50       7862 my @result = (
603 9336         12587 map { ( $_, ($total ? $result{$_} / $total : 0)) }
604 229         1920 sort { $result{$b} <=> $result{$a} } keys %result
605             );
606              
607 229 50       15647 return wantarray ? @result : $result[0];
608             }
609              
610             =head1 METHODS OF LANGUAGE IDENTIFICATION
611              
612             C currently comprises four different ways for language
613             identification, in a total of thirteen variations of those.
614              
615             The available methods are the following: B, B,
616             B, B, B, B, B,
617             B, B, B, B, B and B.
618              
619             Here's a more detailed explanation of each of those ways and those methods
620              
621             =head2 Small Word Technique - B
622              
623             The "Small Word Technique" searches the text for the most common words of each
624             active language. These words are usually articles, pronouns, etc, which happen
625             to be (usually) the shortest words of the language; hence, the method name.
626              
627             This is usually a good method for big texts, especially if you happen to have
628             few languages active.
629              
630             =cut
631              
632             sub _langof_by_word_method {
633 58     58   533 my ($method, $text) = @_;
634              
635             sub _words_count {
636 58     58   150 my ($words, $text) = @_;
637 58         19932 for my $word (split /[\s\n]+/, $text) {
638 17437         32011 $words->{$word}++
639             }
640             }
641              
642 58         101 my %words;
643 58         218 _words_count(\%words, $text);
644 58         1663 return _langof_by_method($method, \%words, $text);
645             }
646              
647             =head2 Prefix Analysis - B, B, B, B
648              
649             This method analyses text for the common prefixes of each active language.
650              
651             The methods are, respectively, for prefixes of size 1, 2, 3 and 4.
652              
653             =cut
654              
655             sub _langof_by_prefix_method {
656 7     7   7247 use Text::Affixes;
  7         39883  
  7         1347  
657              
658 56     56   329 (my $method = shift) =~ /^prefixes(\d)$/;
659 56         289 my $text = shift;
660              
661 56         565 my $prefixes = get_prefixes( {min => $1, max => $1}, $text);
662              
663 56         87839 return _langof_by_method($method, $$prefixes{$1}, $text);
664             }
665              
666             =head2 Suffix Analysis - B, B, B, B
667              
668             Similar to the Prefix Analysis (see above), but instead analysing common
669             suffixes.
670              
671             The methods are, respectively, for suffixes of size 1, 2, 3 and 4.
672              
673             =cut
674              
675             sub _langof_by_suffix_method {
676 7     7   58 use Text::Affixes;
  7         16  
  7         1140  
677              
678 59     59   352 (my $method = shift) =~ /^suffixes(\d)$/;
679 59         315 my $text = shift;
680              
681 59         570 my $suffixes = get_suffixes({min => $1, max => $1}, $text);
682              
683 59         83750 return _langof_by_method($method, $$suffixes{$1}, $text);
684             }
685              
686             ###
687              
688             # Have you seen my brother? He's a two line long comment. I think he
689             # might be lost... :-\ Me and my father have been looking for him for
690             # some time now :-/
691              
692             ###
693              
694             =head2 Ngram Categorization - B, B, B, B
695              
696             Ngrams are sequences of tokens. You can think of them as syllables, but they
697             are also more than that, as they are not only comprised by characters, but also
698             by spaces (delimiting or separating words).
699              
700             Ngrams are a very good way for identifying languages, given that the most
701             common ones of each language are not generally very common in others.
702              
703             This is usually the best method for small amounts of text or too many active
704             languages.
705              
706             The methods are, respectively, for ngrams of size 1, 2, 3 and 4.
707              
708             =cut
709              
710             sub _langof_by_ngram_method {
711 7     7   6738 use Text::Ngram qw(ngram_counts);
  7         25721  
  7         7372  
712              
713 56     56   323 (my $method = shift) =~ /^ngrams([1-4])$/;
714 56         218 my $text = shift;
715              
716 56         484 my $ngrams = ngram_counts( {spaces => 0}, $text, $1);
717              
718 56         276763 return _langof_by_method($method, $ngrams, $text);
719             }
720              
721             =head1 LANGUAGE MANIPULATION
722              
723             When trying to perform language identification, C works not with
724             all available languages, but instead with the ones that are active.
725              
726             By default, all available languages are active, but that can be changed by the
727             user.
728              
729             For your convenience, several methods regarding language manipulation were
730             created. In order to use them, load the module with the tag
731             :language_manipulation.
732              
733             These methods work with the two letters code for languages.
734              
735             =over 6
736              
737             =item B
738              
739             Activate a language
740              
741             activate_language('en');
742              
743             # or
744              
745             activate_language($_) for get_all_languages();
746              
747             =cut
748              
749             sub activate_language {
750 1 50   1 1 10 unless (grep { $_ eq $_[0] } @active_languages) {
  0         0  
751 1         4 push @active_languages, $_[0];
752             }
753 1         7 return @active_languages;
754             }
755              
756             =item B
757              
758             Activates all languages
759              
760             activate_all_languages();
761              
762             =cut
763              
764             sub activate_all_languages {
765 2     2 1 7 @active_languages = get_all_languages();
766 2         17 return @active_languages;
767             }
768              
769             =item B
770              
771             Deactivates a language
772              
773             deactivate_language('en');
774              
775             =cut
776              
777             sub deactivate_language {
778 1     1 1 3 @active_languages = grep { ! ($_ eq $_[0]) } @active_languages;
  26         39  
779 1         15 return @active_languages;
780             }
781              
782             =item B
783              
784             Deactivates all languages
785              
786             deactivate_all_languages();
787              
788             =cut
789              
790             sub deactivate_all_languages {
791 3     3 1 16 @active_languages = ();
792 3         13 return @active_languages;
793             }
794              
795             =item B
796              
797             Returns the names of all available languages
798              
799             my @all_languages = get_all_languages();
800              
801             =cut
802              
803             sub get_all_languages {
804 54     54 1 843 return @all_languages;
805             }
806              
807             =item B
808              
809             Returns the names of all active languages
810              
811             my @active_languages = get_active_languages();
812              
813             =cut
814              
815             sub get_active_languages {
816 299     299 1 2568 return @active_languages;
817             }
818              
819             =item B
820              
821             Returns the names of all inactive languages
822              
823             my @active_languages = get_inactive_languages();
824              
825             =cut
826              
827             sub get_inactive_languages {
828 2     2 1 702 return grep { ! is_active($_) } get_all_languages();
  52         78  
829             }
830              
831             =item B
832              
833             Returns the name of the language if it is active, an empty list otherwise
834              
835             if (is_active('en')) {
836             # YOUR CODE HERE
837             }
838              
839             =cut
840              
841             sub is_active {
842 54     54 1 75 return grep { $_ eq $_[0] } get_active_languages();
  676         1749  
843             }
844              
845             =item B
846              
847             Returns the name of the language if it exists, an empty list otherwise
848              
849             if (is_valid_language('en')) {
850             # YOUR CODE HERE
851             }
852              
853             =cut
854              
855             sub is_valid_language {
856 36     36 1 15256 return grep { $_ eq $_[0] } get_all_languages();
  936         1671  
857             }
858              
859             =item B
860              
861             Sets the active languages
862              
863             set_active_languages('en', 'pt');
864              
865             # or
866              
867             set_active_languages(get_all_languages());
868              
869             =cut
870              
871             sub set_active_languages {
872 3     3 1 10 @active_languages = grep { is_valid_language($_) } @_;
  6         20  
873 3         28 return @active_languages;
874             }
875              
876             =item B
877              
878             Given the two letter tag of a language, returns its name
879              
880             my $language_name = name_of('pt');
881              
882             =cut
883              
884             sub name_of {
885 1   50 1 1 5 my $tag = shift || return undef;
886 1         17 return $languages{_names}{$tag};
887             }
888              
889             =back
890              
891             =cut
892              
893             1;
894             __END__