File Coverage

blib/lib/Lingua/Identify.pm

Criterion	Covered	Total	%
statement	161	165	97.5
branch	49	62	79.0
condition	13	20	65.0
subroutine	33	33	100.0
pod	15	15	100.0
total	271	295	91.8

line	stmt	bran	cond	sub	pod	time	code
1							package Lingua::Identify;
2
3	7			7		164532	use 5.006;
	7					28
	7					278
4	7			7		47	use strict;
	7					13
	7					259
5	7			7		35	use warnings;
	7					18
	7					222
6
7	7			7		5050	use utf8;
	7					53
	7					39
8	7			7		241	use base 'Exporter';
	7					14
	7					2837
9
10							our %EXPORT_TAGS =
11							(
12							all => [ qw(
13							langof
14							langof_file
15							confidence
16							get_all_methods
17							activate_all_languages
18							deactivate_all_languages
19							get_all_languages
20							get_active_languages
21							get_inactive_languages
22							is_active
23							is_valid_language
24							activate_language
25							deactivate_language
26							set_active_languages
27							name_of
28							)
29							],
30							language_identification => [ qw(
31							langof
32							langof_file
33							confidence
34							get_all_methods
35							)
36							],
37
38							language_manipulation => [ qw(
39							activate_all_languages
40							deactivate_all_languages
41							get_all_languages
42							get_active_languages
43							get_inactive_languages
44							is_active
45							is_valid_language
46							activate_language
47							deactivate_language
48							set_active_languages
49							name_of
50							)
51							],
52							);
53
54							our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
55							our @EXPORT = qw();
56
57							our $VERSION = '0.56';
58
59
60							# DEFAULT VALUES #
61							our %default_methods = qw/smallwords 1.3 prefixes2 1.5 suffixes3 1.5 ngrams3 1.2/;
62							my $default_maxsize = 1_000_000;
63							my %default_extractfrom = qw/head 1/;
64
65							=head1 NAME
66
67							Lingua::Identify - Language identification
68
69							=head1 SYNOPSIS
70
71							use Lingua::Identify qw(:language_identification);
72							$a = langof($textstring); # gives the most probable language
73
74							or the complete way:
75
76							@a = langof($textstring); # gives pairs of languages / probabilities
77							# sorted from most to least probable
78
79							%a = langof($textstring); # gives a hash of language / probability
80
81							or the expert way (see section OPTIONS, under HOW TO PERFORM IDENTIFICATION)
82
83							$a = langof( { method => [qw/smallwords prefix2 suffix2/] }, $text);
84
85							$a = langof( { 'max-size' => 3_000_000 }, $text);
86
87							$a = langof( { 'extract_from' => ( 'head' => 1, 'tail' => 2)}, $text);
88
89							=head1 DESCRIPTION
90
91							B
92
93							C identifies the language a given string or file is
94							written in.
95
96							See section WHY LINGUA::IDENTIFY for a list of C's strong
97							points.
98
99							See section KNOWN LANGUAGES for a list of available languages and HOW TO
100							PERFORM IDENTIFICATION to know how to really use this module.
101
102							If you're in a hurry, jump to section EXAMPLES, way down below.
103
104							Also, don't forget to read the following section, IMPORTANT WARNING.
105
106							=head1 A WARNING ON THE ACCURACY OF LANGUAGE IDENTIFICATION METHODS
107
108							Take a word that exists in two different languages, take a good look at it and
109							answer this question: "What language does this word belong to?".
110
111							You can't give an answer like "Language X", right? You can only say it looks
112							like any of a set of languages.
113
114							Similarly, it isn't always easy to identify the language of a text if the only
115							two active languages are very similar.
116
117							Now that we've taken out of the way the warning that language identification
118							is not 100% accurate, please keep reading the documentation.
119
120							=head1 WHY LINGUA::IDENTIFY
121
122							You might be wondering why you should use Lingua::Identify instead of any other
123							tool for language identification.
124
125							Here's a list of Lingua::Identify's strong points:
126
127							=over 6
128
129							=item * it's free and it's open-source;
130
131							=item * it's portable (it's Perl, which means it will work in lots of different
132							platforms);
133
134							=item * unicode support;
135
136							=item * 4 different methods of language identification and growing (see
137							METHODS OF LANGUAGE IDENTIFICATION for more details on this one);
138
139							=item * it's a module, which means you can easily write your own application
140							(be it CGI, TK, whatever) around it;
141
142							=item * it comes with I, which means you don't actually need to
143							write your own application around it;
144
145							=item * it's flexible (at the moment, you can actually choose the
146							methods to use and their relevance, the max size of input to analyze
147							each time and which part(s) of the input to analyze)
148
149							=item * it supports big inputs (through the 'max-size' and
150							'extract_from' options)
151
152							=item * it's easy to deal with languages (you can activate and
153							deactivate the ones you choose whenever you want to, which can improve
154							your times and accuracy);
155
156							=item * it's maintained.
157
158							=back
159
160							=cut
161
162							# initialization
163
164							our (@all_languages,@active_languages,%languages,%regexen,@methods);
165							BEGIN {
166
167	7			7		6465	use Class::Factory::Util;
	7					4777
	7					42
168	7			7		1629	for ( Lingua::Identify->subclasses() ) {
169	189	100				17904	/^[A-Z][A-Z]$/ \|\| next;
170	182					13830	eval "require Lingua::Identify::$_ ;";
171	182	50				1578	if ($languages{_versions}{lc $_} < 0.02) {
172	0					0	for my $k (keys %languages) {
173	0	0				0	delete($languages{$k}{lc $_}) if exists $languages{$k}{lc $_};
174							}
175							}
176							}
177
178	7					87	@all_languages = @active_languages = keys %{$languages{_names}};
	7					140
179
180	7					14750	@methods = qw/smallwords/;
181
182							}
183
184							=head1 HOW TO PERFORM IDENTIFICATION
185
186							=head2 langof
187
188							To identify the language a given text is written in, use the I function.
189							To get a single value, do:
190
191							$language = langof($text);
192
193							To get the most probable language and also the percentage of its probability,
194							do:
195
196							($language, $probability) = langof($text);
197
198							If you want a hash where each active language is mapped into its percentage,
199							use this:
200
201							%languages = langof($text);
202
203							=cut
204
205							sub langof {
206	70			70	1	13705	my %config = ();
207	70	100				314	%config = (%config, %{+shift}) if ref($_[0]) eq 'HASH';
	41					167
208
209							=head3 OPTIONS
210
211							I can also be given some configuration parameters, in this way:
212
213							$language = langof(\%config, $text);
214
215							These parameters are detailed here:
216
217							=over 6
218
219							=item * B
220
221							When the size of the input exceeds the C'max-size', C analyzes
222							only the beginning of the file. You can specify which part of the file
223							is analyzed with the 'extract-from' option:
224
225							langof( { 'extract_from' => 'tail' } , $text );
226
227							Possible values are 'head' and 'tail' (for now).
228
229							You can also specify more than one part of the file, so that text is
230							extracted from those parts:
231
232							langof( { 'extract_from' => [ 'head', 'tail' ] } , $text );
233
234							(this will be useful when more than two possibilities exist)
235
236							You can also specify different values for each part of the file (not
237							necessarily for all of them:
238
239							langof( { 'extract_from' => { head => 40, tail => 60 } } , $text);
240
241							The line above, for instance, retrives 40% of the text from the
242							beginning and 60% from the end. Note, however, that those values are
243							not percentages. You'd get the same behavior with:
244
245							langof( { 'extract_from' => { head => 80, tail => 120 } } , $text);
246
247							The percentages would be the same.
248
249							=item * B
250
251							By default, C analyzes only 1,000,000 bytes. You can specify
252							how many bytes (at the most) can be analyzed (if not enough exist, the
253							whole input is still analyzed).
254
255							langof( { 'max-size' => 2000 }, $text);
256
257							If you want all the text to be analyzed, set max-size to 0:
258
259							langof( { 'max-size' => 0 }, $text);
260
261							See also C.
262
263							=item * B
264
265							You can choose which method or methods to use, and also the relevance of each of
266							them.
267
268							To choose a single method to use:
269
270							langof( {method => 'smallwords' }, $text);
271
272							To choose several methods:
273
274							langof( {method => [qw/prefixes2 suffixes2/]}, $text);
275
276							To choose several methods and give them different weight:
277
278							langof( {method => {smallwords => 0.5, ngrams3 => 1.5} }, $text);
279
280							To see the list of available methods, see section METHODS OF LANGUAGE
281							IDENTIFICATION.
282
283							If no method is specified, the configuration for this parameter is the
284							following (this might change in the future):
285
286							method => {
287							smallwords => 0.5,
288							prefixes2 => 1,
289							suffixes3 => 1,
290							ngrams3 => 1.3
291							};
292
293							=item * B
294
295							By default, C assumes C mode, but others are
296							available.
297
298							In C mode, instead of actually calculating anything,
299							C only does the preparation it has to and then
300							returns a bunch of information, including the list of the active
301							languages, the selected methods, etc. It also returns the text meant
302							to be analised.
303
304							Do be warned that, with I, the dummy mode still reads the
305							files, it simply doesn't calculate language.
306
307							langof( { 'mode' => 'dummy' }, $text);
308
309							This returns something like this:
310
311							{ 'methods' => { 'smallwords' => '0.5',
312							'prefixes2' => '1',
313							},
314							'config' => { 'mode' => 'dummy' },
315							'max-size' => 1000000,
316							'active-languages' => [ 'es', 'pt' ],
317							'text' => $text,
318							'mode' => 'dummy',
319							}
320
321							=back
322
323							=cut
324
325							# select the methods
326	70	100				612	my %methods = defined $config{'method'} ? _make_hash($config{'method'})
327							: %default_methods;
328
329							# select max-size
330	70	100				274	my $maxsize = defined $config{'max-size'} ? $config{'max-size'}
331							: $default_maxsize;
332
333							# get the text
334	70					464	my $text = join "\n", @_;
335	70	100				245	return wantarray ? () : undef unless $text;
		100
336
337							# this is the support for big files; if the input is bigger than the $maxsize, we act
338	68	100	100			1253	if ($maxsize < length $text && $maxsize != 0) {
339							# select extract_from
340	1	50				8	my %extractfrom = defined $config{'extract_from'} ? _make_hash($config{'extract_from'})
341							: %default_extractfrom;
342	1					3	my $total_weight = 0;
343	1					3	for (keys %extractfrom) {
344	1	50	33			7	if ($_ eq 'head' or $_ eq 'tail') {
345	1					4	$total_weight += $extractfrom{$_};
346	1					3	next;
347							}
348							else {
349	0					0	delete $extractfrom{$_};
350							}
351							}
352	1					14	for (keys %extractfrom) {
353	1					6	$extractfrom{$_} = $extractfrom{$_} / $total_weight;
354							}
355
356	1		50			5	$extractfrom{'head'} \|\|= 0;
357	1		50			7	$extractfrom{'tail'} \|\|= 0;
358
359	1					4	my $head = int $maxsize * $extractfrom{'head'};
360	1					3	my $tail = length($text) - $head - int $maxsize * $extractfrom{'tail'};
361	1					6	substr( $text, $head, $tail, '');
362							}
363
364							# dummy mode exits here
365	68		100			427	$config{'mode'} \|\|= 'normal';
366	68	100				258	if ($config{'mode'} eq 'dummy') {
367							return {
368	7					20	'method' => \%methods,
369							'max-size' => $maxsize,
370							'config' => \%config,
371							'active-languages' => [ sort (get_active_languages()) ],
372							'text' => $text,
373							'mode' => $config{'mode'},
374							};
375							}
376
377							# use the methods
378	61					98	my (%result, $total);
379	61					176	for (keys %methods) {
380	229					334	my %temp_result;
381
382	229	100				1661	if (/^smallwords$/) {
		100
		100
		50
383	58					233	%temp_result = _langof_by_word_method('smallwords', $text);
384							}
385							elsif (/^(prefixes[1-4])$/) {
386	56					236	%temp_result = _langof_by_prefix_method($1, $text);
387							}
388							elsif (/^(suffixes[1-4])$/) {
389	59					246	%temp_result = _langof_by_suffix_method($1, $text);
390							}
391							elsif (/^(ngrams[1-4])$/) {
392	56					232	%temp_result = _langof_by_ngram_method($1, $text);
393							}
394
395	229					3237	for my $l (keys %temp_result) {
396	3154					4667	my $temp = $temp_result{$l} * $methods{$_};
397	3154					3989	$result{$l} += $temp;
398	3154					5106	$total += $temp;
399							}
400							}
401
402							# report the results
403	1040	50				2595	my @result = (
404	3378					4152	map { ( $_, ($total ? $result{$_} / $total : 0)) }
405	61					400	sort { $result{$b} <=> $result{$a} } keys %result
406							);
407
408	61	100				1438	return wantarray ? @result : $result[0];
409							}
410
411							sub _make_hash {
412	11			11		21	my %hash;
413	11					21	my $temp = shift;
414	11					27	for (ref($temp)) {
415	11	100				52	if (/^HASH$/) {
		100
416	1					2	%hash = %{$temp};
	1					6
417							}
418							elsif (/^ARRAY$/) {
419	1					2	for (@{$temp}) {
	1					3
420	2					7	$hash{$_}++;
421							}
422							}
423							else {
424	9					32	$hash{$temp} = 1;
425							}
426							}
427	11					47	%hash;
428							}
429
430							=head2 langof_file
431
432							I works just like I, with the exception that it
433							reveives filenames instead of text. It reads these texts (if existing
434							and readable, of course) and parses its content.
435
436							Currently, I assumes the files are regular text. This may
437							change in the future and the files might be scanned to check their
438							filetype and then parsed to extract only their textual content (which
439							should be pretty useful so that you can perform language
440							identification, say, in HTML files, or PDFs).
441
442							To identify the language a file is written in:
443
444							$language = langof_file($path);
445
446							To get the most probable language and also the percentage of its probability,
447							do:
448
449							($language, $probability) = langof_file($path);
450
451							If you want a hash where each active language is mapped into its percentage,
452							use this:
453
454							%languages = langof_file($path);
455
456							If you pass more than one file to I, they will all be
457							read and their content merged and then parsed for language
458							identification.
459
460							=cut
461
462							sub langof_file {
463	30			30	1	15257	my %config = ();
464	30	100				161	if (ref($_[0]) eq 'HASH') {%config = (%config, %{+shift})}
	2					6
	2					25
465
466							=head3 OPTIONS
467
468							I accepts all the options I does, so refer to
469							those first (up in this document).
470
471							$language = langof_file(\%config, $path);
472
473							I currently only reads the first 10,000 bytes of each
474							file.
475
476							You can force an input encoding with C<< { encoding => 'ISO-8859-1' } >>
477							in the configuration hash.
478
479							=cut
480
481							# select max-size
482	30	50				146	my $maxsize = defined $config{'max-size'} ? $config{'max-size'}
483							: $default_maxsize;
484
485	30					80	my @files = @_;
486	30					64	my $text = '';
487
488	30					94	for my $file (@files) {
489							#-r and -e or next;
490	31	100				100	if (exists($config{encoding})) {
491	1	50		1		46	open(FILE, "<:encoding($config{encoding})", $file) or next;
	1					11
	1					2
	1					8
492							} else {
493	30	50				1618	open(FILE, "<:utf8", $file) or next;
494							}
495	31					16116	local $/ = \$maxsize;
496	31					1999	$text .= ;
497	31					644	close(FILE);
498							}
499
500	30					137	return langof(\%config,$text);
501							}
502
503							=head2 confidence
504
505							After getting the results into an array, its first element is the most probable
506							language. That doesn't mean it is very probable or not.
507
508							You can find more about the likeliness of the results to be accurate by
509							computing its confidence level.
510
511							use Lingua::Identify qw/:language_identification/;
512							my @results = langof($text);
513							my $confidence_level = confidence(@results);
514							# $confidence_level now holds a value between 0.5 and 1; the higher that
515							# value, the more accurate the results seem to be
516
517							The formula used is pretty simple: p1 / (p1 + p2) , where p1 is the
518							probability of the most likely language and p2 is the probability of
519							the language which came in second. A couple of examples to illustrate
520							this:
521
522							English 50% Portuguese 10% ...
523
524							confidence level: 50 / (50 + 10) = 0.83
525
526							Another example:
527
528							Spanish 30% Portuguese 10% ...
529
530							confidence level: 30 / (25 + 30) = 0.55
531
532							French 10% German 5% ...
533
534							confidence level: 10 / (10 + 5) = 0.67
535
536							As you can see, the first example is probably the most accurate one.
537							Are there any doubts? The English language has five times the
538							probability of the second language.
539
540							The second example is a bit more tricky. 55% confidence. The
541							confidence level is always above 50%, for obvious reasons. 55% doesn't
542							make anyone confident in the results, and one shouldn't be, with
543							results such as these.
544
545							Notice the third example. The confidence level goes up to 67%, but the
546							probability of French is of mere 10%. So what? It's twice as much as
547							the second language. The low probability may well be caused by a great
548							number of languages in play.
549
550							=cut
551
552							sub confidence {
553	60	100	66	60	1	87608	defined $_[1] and $_[1] or return 0;
554	57	100	66			452	defined $_[3] and $_[3] or return 1;
555	56					408	$_[1] / ($_[1] + $_[3]);
556							}
557
558							=head2 get_all_methods
559
560							Returns a list comprised of all the available methods for language
561							identification.
562
563							=cut
564
565							sub get_all_methods {
566	1			1	1	27	qw/smallwords
567							prefixes1 prefixes2 prefixes3 prefixes4
568							suffixes1 suffixes2 suffixes3 suffixes4
569							ngrams1 ngrams2 ngrams3 ngrams4/
570							}
571
572							=head1 LANGUAGE IDENTIFICATION IN GENERAL
573
574							Language identification is based in patterns.
575
576							In order to identify the language a given text is written in, we repeat a given
577							process for each active language (see section LANGUAGES MANIPULATION); in that
578							process, we look for common patterns of that language. Those patterns can be
579							prefixes, suffixes, common words, ngrams or even sequences of words.
580
581							After repeating the process for each language, the total score for each of them
582							is then used to compute the probability (in percentage) for each language to be
583							the one of that text.
584
585							=cut
586
587							sub _langof_by_method {
588	229			229		673	my ($method, $elements, $text) = @_;
589	229					280	my (%result, $total);
590
591	229					569	for my $language (get_active_languages()) {
592	5903					6203	for (keys %{$languages{$method}{$language}}) {
	5903					40692
593	116140	100				245400	if (exists $$elements{$_}) {
594	15746					41310	$result{$language} +=
595							$$elements{$_} * ${languages{$method}{$language}{$_}};
596	15746					36930	$total +=
597							$$elements{$_} * ${languages{$method}{$language}{$_}};
598							}
599							}
600							}
601
602	3154	50				7862	my @result = (
603	9336					12587	map { ( $_, ($total ? $result{$_} / $total : 0)) }
604	229					1920	sort { $result{$b} <=> $result{$a} } keys %result
605							);
606
607	229	50				15647	return wantarray ? @result : $result[0];
608							}
609
610							=head1 METHODS OF LANGUAGE IDENTIFICATION
611
612							C currently comprises four different ways for language
613							identification, in a total of thirteen variations of those.
614
615							The available methods are the following: B, B,
616							B, B, B, B, B,
617							B, B, B, B, B and B.
618
619							Here's a more detailed explanation of each of those ways and those methods
620
621							=head2 Small Word Technique - B
622
623							The "Small Word Technique" searches the text for the most common words of each
624							active language. These words are usually articles, pronouns, etc, which happen
625							to be (usually) the shortest words of the language; hence, the method name.
626
627							This is usually a good method for big texts, especially if you happen to have
628							few languages active.
629
630							=cut
631
632							sub _langof_by_word_method {
633	58			58		533	my ($method, $text) = @_;
634
635							sub _words_count {
636	58			58		150	my ($words, $text) = @_;
637	58					19932	for my $word (split /[\s\n]+/, $text) {
638	17437					32011	$words->{$word}++
639							}
640							}
641
642	58					101	my %words;
643	58					218	_words_count(\%words, $text);
644	58					1663	return _langof_by_method($method, \%words, $text);
645							}
646
647							=head2 Prefix Analysis - B, B, B, B
648
649							This method analyses text for the common prefixes of each active language.
650
651							The methods are, respectively, for prefixes of size 1, 2, 3 and 4.
652
653							=cut
654
655							sub _langof_by_prefix_method {
656	7			7		7247	use Text::Affixes;
	7					39883
	7					1347
657
658	56			56		329	(my $method = shift) =~ /^prefixes(\d)$/;
659	56					289	my $text = shift;
660
661	56					565	my $prefixes = get_prefixes( {min => $1, max => $1}, $text);
662
663	56					87839	return _langof_by_method($method, $$prefixes{$1}, $text);
664							}
665
666							=head2 Suffix Analysis - B, B, B, B
667
668							Similar to the Prefix Analysis (see above), but instead analysing common
669							suffixes.
670
671							The methods are, respectively, for suffixes of size 1, 2, 3 and 4.
672
673							=cut
674
675							sub _langof_by_suffix_method {
676	7			7		58	use Text::Affixes;
	7					16
	7					1140
677
678	59			59		352	(my $method = shift) =~ /^suffixes(\d)$/;
679	59					315	my $text = shift;
680
681	59					570	my $suffixes = get_suffixes({min => $1, max => $1}, $text);
682
683	59					83750	return _langof_by_method($method, $$suffixes{$1}, $text);
684							}
685
686							###
687
688							# Have you seen my brother? He's a two line long comment. I think he
689							# might be lost... :-\ Me and my father have been looking for him for
690							# some time now :-/
691
692							###
693
694							=head2 Ngram Categorization - B, B, B, B
695
696							Ngrams are sequences of tokens. You can think of them as syllables, but they
697							are also more than that, as they are not only comprised by characters, but also
698							by spaces (delimiting or separating words).
699
700							Ngrams are a very good way for identifying languages, given that the most
701							common ones of each language are not generally very common in others.
702
703							This is usually the best method for small amounts of text or too many active
704							languages.
705
706							The methods are, respectively, for ngrams of size 1, 2, 3 and 4.
707
708							=cut
709
710							sub _langof_by_ngram_method {
711	7			7		6738	use Text::Ngram qw(ngram_counts);
	7					25721
	7					7372
712
713	56			56		323	(my $method = shift) =~ /^ngrams([1-4])$/;
714	56					218	my $text = shift;
715
716	56					484	my $ngrams = ngram_counts( {spaces => 0}, $text, $1);
717
718	56					276763	return _langof_by_method($method, $ngrams, $text);
719							}
720
721							=head1 LANGUAGE MANIPULATION
722
723							When trying to perform language identification, C works not with
724							all available languages, but instead with the ones that are active.
725
726							By default, all available languages are active, but that can be changed by the
727							user.
728
729							For your convenience, several methods regarding language manipulation were
730							created. In order to use them, load the module with the tag
731							:language_manipulation.
732
733							These methods work with the two letters code for languages.
734
735							=over 6
736
737							=item B
738
739							Activate a language
740
741							activate_language('en');
742
743							# or
744
745							activate_language($_) for get_all_languages();
746
747							=cut
748
749							sub activate_language {
750	1	50		1	1	10	unless (grep { $_ eq $_[0] } @active_languages) {
	0					0
751	1					4	push @active_languages, $_[0];
752							}
753	1					7	return @active_languages;
754							}
755
756							=item B
757
758							Activates all languages
759
760							activate_all_languages();
761
762							=cut
763
764							sub activate_all_languages {
765	2			2	1	7	@active_languages = get_all_languages();
766	2					17	return @active_languages;
767							}
768
769							=item B
770
771							Deactivates a language
772
773							deactivate_language('en');
774
775							=cut
776
777							sub deactivate_language {
778	1			1	1	3	@active_languages = grep { ! ($_ eq $_[0]) } @active_languages;
	26					39
779	1					15	return @active_languages;
780							}
781
782							=item B
783
784							Deactivates all languages
785
786							deactivate_all_languages();
787
788							=cut
789
790							sub deactivate_all_languages {
791	3			3	1	16	@active_languages = ();
792	3					13	return @active_languages;
793							}
794
795							=item B
796
797							Returns the names of all available languages
798
799							my @all_languages = get_all_languages();
800
801							=cut
802
803							sub get_all_languages {
804	54			54	1	843	return @all_languages;
805							}
806
807							=item B
808
809							Returns the names of all active languages
810
811							my @active_languages = get_active_languages();
812
813							=cut
814
815							sub get_active_languages {
816	299			299	1	2568	return @active_languages;
817							}
818
819							=item B
820
821							Returns the names of all inactive languages
822
823							my @active_languages = get_inactive_languages();
824
825							=cut
826
827							sub get_inactive_languages {
828	2			2	1	702	return grep { ! is_active($_) } get_all_languages();
	52					78
829							}
830
831							=item B
832
833							Returns the name of the language if it is active, an empty list otherwise
834
835							if (is_active('en')) {
836							# YOUR CODE HERE
837							}
838
839							=cut
840
841							sub is_active {
842	54			54	1	75	return grep { $_ eq $_[0] } get_active_languages();
	676					1749
843							}
844
845							=item B
846
847							Returns the name of the language if it exists, an empty list otherwise
848
849							if (is_valid_language('en')) {
850							# YOUR CODE HERE
851							}
852
853							=cut
854
855							sub is_valid_language {
856	36			36	1	15256	return grep { $_ eq $_[0] } get_all_languages();
	936					1671
857							}
858
859							=item B
860
861							Sets the active languages
862
863							set_active_languages('en', 'pt');
864
865							# or
866
867							set_active_languages(get_all_languages());
868
869							=cut
870
871							sub set_active_languages {
872	3			3	1	10	@active_languages = grep { is_valid_language($_) } @_;
	6					20
873	3					28	return @active_languages;
874							}
875
876							=item B
877
878							Given the two letter tag of a language, returns its name
879
880							my $language_name = name_of('pt');
881
882							=cut
883
884							sub name_of {
885	1		50	1	1	5	my $tag = shift \|\| return undef;
886	1					17	return $languages{_names}{$tag};
887							}
888
889							=back
890
891							=cut
892
893							1;
894							__END__