File Coverage

blib/lib/Lingua/Identify.pm

Criterion	Covered	Total	%
statement	177	181	97.7
branch	58	74	78.3
condition	12	20	60.0
subroutine	34	34	100.0
pod	15	15	100.0
total	296	324	91.3

line	stmt	bran	cond	sub	pod	time	code
1							package Lingua::Identify;
2
3	7			7		168609	use 5.006;
	7					23
	7					260
4	7			7		38	use strict;
	7					12
	7					210
5	7			7		38	use warnings;
	7					16
	7					193
6
7	7			7		4928	use utf8;
	7					49
	7					39
8	7			7		241	use base 'Exporter';
	7					14
	7					3187
9
10							our %EXPORT_TAGS =
11							(
12							all => [ qw(
13							langof
14							langof_file
15							confidence
16							get_all_methods
17							activate_all_languages
18							deactivate_all_languages
19							get_all_languages
20							get_active_languages
21							get_inactive_languages
22							is_active
23							is_valid_language
24							activate_language
25							deactivate_language
26							set_active_languages
27							name_of
28							)
29							],
30							language_identification => [ qw(
31							langof
32							langof_file
33							confidence
34							get_all_methods
35							)
36							],
37
38							language_manipulation => [ qw(
39							activate_all_languages
40							deactivate_all_languages
41							get_all_languages
42							get_active_languages
43							get_inactive_languages
44							is_active
45							is_valid_language
46							activate_language
47							deactivate_language
48							set_active_languages
49							name_of
50							)
51							],
52							);
53
54							our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
55							our @EXPORT = qw();
56
57							our $VERSION = '0.57_1';
58
59
60							# DEFAULT VALUES #
61							our %default_methods = qw/smallwords 2 prefixes3 1 suffixes3 1 ngrams3 1 letters 0.5/;
62							my $default_maxsize = 1_000_000;
63							my %default_extractfrom = qw/head 1/;
64
65							=encoding utf8
66
67							=head1 NAME
68
69							Lingua::Identify - Language identification
70
71							=head1 SYNOPSIS
72
73							use Lingua::Identify qw(:language_identification);
74							$a = langof($textstring); # gives the most probable language
75
76							or the complete way:
77
78							@a = langof($textstring); # gives pairs of languages / probabilities
79							# sorted from most to least probable
80
81							%a = langof($textstring); # gives a hash of language / probability
82
83							or the expert way (see section OPTIONS, under HOW TO PERFORM IDENTIFICATION)
84
85							$a = langof( { method => [qw/smallwords prefix2 suffix2/] }, $text);
86
87							$a = langof( { 'max-size' => 3_000_000 }, $text);
88
89							$a = langof( { 'extract_from' => ( 'head' => 1, 'tail' => 2)}, $text);
90
91							=head1 DESCRIPTION
92
93							B
94
95							C identifies the language a given string or file is
96							written in.
97
98							See section WHY LINGUA::IDENTIFY for a list of C's strong
99							points.
100
101							See section KNOWN LANGUAGES for a list of available languages and HOW TO
102							PERFORM IDENTIFICATION to know how to really use this module.
103
104							If you're in a hurry, jump to section EXAMPLES, way down below.
105
106							Also, don't forget to read the following section, IMPORTANT WARNING.
107
108							=head1 A WARNING ON THE ACCURACY OF LANGUAGE IDENTIFICATION METHODS
109
110							Take a word that exists in two different languages, take a good look at it and
111							answer this question: "What language does this word belong to?".
112
113							You can't give an answer like "Language X", right? You can only say it looks
114							like any of a set of languages.
115
116							Similarly, it isn't always easy to identify the language of a text if the only
117							two active languages are very similar.
118
119							Now that we've taken out of the way the warning that language identification
120							is not 100% accurate, please keep reading the documentation.
121
122							=head1 WHY LINGUA::IDENTIFY
123
124							You might be wondering why you should use Lingua::Identify instead of any other
125							tool for language identification.
126
127							Here's a list of Lingua::Identify's strong points:
128
129							=over 6
130
131							=item * it's free and it's open-source;
132
133							=item * it's portable (it's Perl, which means it will work in lots of different
134							platforms);
135
136							=item * unicode support;
137
138							=item * 4 different methods of language identification and growing (see
139							METHODS OF LANGUAGE IDENTIFICATION for more details on this one);
140
141							=item * it's a module, which means you can easily write your own application
142							(be it CGI, TK, whatever) around it;
143
144							=item * it comes with I, which means you don't actually need to
145							write your own application around it;
146
147							=item * it's flexible (at the moment, you can actually choose the
148							methods to use and their relevance, the max size of input to analyze
149							each time and which part(s) of the input to analyze)
150
151							=item * it supports big inputs (through the 'max-size' and
152							'extract_from' options)
153
154							=item * it's easy to deal with languages (you can activate and
155							deactivate the ones you choose whenever you want to, which can improve
156							your times and accuracy);
157
158							=item * it's maintained.
159
160							=back
161
162							=cut
163
164							# initialization
165
166							our (@all_languages,@active_languages,%languages,%regexen,@methods);
167							BEGIN {
168
169	7			7		6391	use Class::Factory::Util;
	7					4797
	7					42
170	7			7		1992	for ( Lingua::Identify->subclasses() ) {
171	196	100				17969	/^[A-Z]{2}(?:_[A-Z]{2})?$/ \|\| next;
172	189					13787	eval "require Lingua::Identify::$_ ;";
173	189	50				1678	if ($languages{_versions}{lc $_} <= 0.02) {
174	0					0	for my $k (keys %languages) {
175	0	0				0	delete($languages{$k}{lc $_}) if exists $languages{$k}{lc $_};
176							}
177							}
178							}
179
180	7					94	@all_languages = @active_languages = keys %{$languages{_names}};
	7					267
181
182	7					14057	@methods = qw/smallwords/;
183
184							}
185
186							=head1 HOW TO PERFORM IDENTIFICATION
187
188							=head2 langof
189
190							To identify the language a given text is written in, use the I function.
191							To get a single value, do:
192
193							$language = langof($text);
194
195							To get the most probable language and also the percentage of its probability,
196							do:
197
198							($language, $probability) = langof($text);
199
200							If you want a hash where each active language is mapped into its percentage,
201							use this:
202
203							%languages = langof($text);
204
205							=cut
206
207							sub langof {
208	72			72	1	17213	my %config = ();
209	72	100				338	%config = (%config, %{+shift}) if ref($_[0]) eq 'HASH';
	42					133
210
211							=head3 OPTIONS
212
213							I can also be given some configuration parameters, in this way:
214
215							$language = langof(\%config, $text);
216
217							These parameters are detailed here:
218
219							=over 6
220
221							=item * B
222
223							When the size of the input exceeds the C'max-size', C analyzes
224							only the beginning of the file. You can specify which part of the file
225							is analyzed with the 'extract-from' option:
226
227							langof( { 'extract_from' => 'tail' } , $text );
228
229							Possible values are 'head' and 'tail' (for now).
230
231							You can also specify more than one part of the file, so that text is
232							extracted from those parts:
233
234							langof( { 'extract_from' => [ 'head', 'tail' ] } , $text );
235
236							(this will be useful when more than two possibilities exist)
237
238							You can also specify different values for each part of the file (not
239							necessarily for all of them:
240
241							langof( { 'extract_from' => { head => 40, tail => 60 } } , $text);
242
243							The line above, for instance, retrives 40% of the text from the
244							beginning and 60% from the end. Note, however, that those values are
245							not percentages. You'd get the same behavior with:
246
247							langof( { 'extract_from' => { head => 80, tail => 120 } } , $text);
248
249							The percentages would be the same.
250
251							=item * B
252
253							By default, C analyzes only 1,000,000 bytes. You can specify
254							how many bytes (at the most) can be analyzed (if not enough exist, the
255							whole input is still analyzed).
256
257							langof( { 'max-size' => 2000 }, $text);
258
259							If you want all the text to be analyzed, set max-size to 0:
260
261							langof( { 'max-size' => 0 }, $text);
262
263							See also C.
264
265							=item * B
266
267							You can choose which method or methods to use, and also the relevance of each of
268							them.
269
270							To choose a single method to use:
271
272							langof( {method => 'smallwords' }, $text);
273
274							To choose several methods:
275
276							langof( {method => [qw/prefixes2 suffixes2/]}, $text);
277
278							To choose several methods and give them different weight:
279
280							langof( {method => {smallwords => 0.5, ngrams3 => 1.5} }, $text);
281
282							To see the list of available methods, see section METHODS OF LANGUAGE
283							IDENTIFICATION.
284
285							If no method is specified, the configuration for this parameter is the
286							following (this might change in the future):
287
288							method => {
289							smallwords => 0.5,
290							prefixes2 => 1,
291							suffixes3 => 1,
292							ngrams3 => 1.3
293							};
294
295							=item * B
296
297							By default, C assumes C mode, but others are
298							available.
299
300							In C mode, instead of actually calculating anything,
301							C only does the preparation it has to and then
302							returns a bunch of information, including the list of the active
303							languages, the selected methods, etc. It also returns the text meant
304							to be analised.
305
306							Do be warned that, with I, the dummy mode still reads the
307							files, it simply doesn't calculate language.
308
309							langof( { 'mode' => 'dummy' }, $text);
310
311							This returns something like this:
312
313							{ 'methods' => { 'smallwords' => '0.5',
314							'prefixes2' => '1',
315							},
316							'config' => { 'mode' => 'dummy' },
317							'max-size' => 1000000,
318							'active-languages' => [ 'es', 'pt' ],
319							'text' => $text,
320							'mode' => 'dummy',
321							}
322
323							=back
324
325							=cut
326
327							# select the methods
328	72	100				682	my %methods = defined $config{'method'} ? _make_hash($config{'method'})
329							: %default_methods;
330
331							# select max-size
332	72	100				290	my $maxsize = defined $config{'max-size'} ? $config{'max-size'}
333							: $default_maxsize;
334
335							# get the text
336	72					452	my $text = join "\n", @_;
337	72	100				208	return wantarray ? () : undef unless $text;
		100
338
339							# this is the support for big files; if the input is bigger than the $maxsize, we act
340	70	100	100			986	if ($maxsize < length $text && $maxsize != 0) {
341							# select extract_from
342	1	50				9	my %extractfrom = defined $config{'extract_from'} ? _make_hash($config{'extract_from'})
343							: %default_extractfrom;
344	1					4	my $total_weight = 0;
345	1					5	for (keys %extractfrom) {
346	1	50	33			8	if ($_ eq 'head' or $_ eq 'tail') {
347	1					5	$total_weight += $extractfrom{$_};
348	1					3	next;
349							}
350							else {
351	0					0	delete $extractfrom{$_};
352							}
353							}
354	1					11	for (keys %extractfrom) {
355	1					7	$extractfrom{$_} = $extractfrom{$_} / $total_weight;
356							}
357
358	1		50			6	$extractfrom{'head'} \|\|= 0;
359	1		50			8	$extractfrom{'tail'} \|\|= 0;
360
361	1					5	my $head = int $maxsize * $extractfrom{'head'};
362	1					4	my $tail = length($text) - $head - int $maxsize * $extractfrom{'tail'};
363	1					7	substr( $text, $head, $tail, '');
364							}
365
366							# dummy mode exits here
367	70		100			430	$config{'mode'} \|\|= 'normal';
368	70	100				219	if ($config{'mode'} eq 'dummy') {
369							return {
370	7					25	'method' => \%methods,
371							'max-size' => $maxsize,
372							'config' => \%config,
373							'active-languages' => [ sort (get_active_languages()) ],
374							'text' => $text,
375							'mode' => $config{'mode'},
376							};
377							}
378
379							# use the methods
380	63					75	my (%result, $total);
381	63					188	for (keys %methods) {
382	295					449	my %temp_result;
383
384	295	100				2891	if (/^letters$/) {
		100
		100
		100
		50
385	58					213	%temp_result = _langof_by_used_letters('letters', $text);
386							}
387							elsif (/^smallwords$/) {
388	60					294	%temp_result = _langof_by_word_method('smallwords', $text);
389							}
390							elsif (/^(prefixes[1-4])$/) {
391	58					249	%temp_result = _langof_by_prefix_method($1, $text);
392							}
393							elsif (/^(suffixes[1-4])$/) {
394	61					277	%temp_result = _langof_by_suffix_method($1, $text);
395							}
396							elsif (/^(ngrams[1-4])$/) {
397	58					250	%temp_result = _langof_by_ngram_method($1, $text);
398							}
399
400	295					3729	for my $l (keys %temp_result) {
401	4733					7726	my $temp = $temp_result{$l} * $methods{$_};
402	4733					5684	$result{$l} += $temp;
403	4733					7054	$total += $temp;
404							}
405							}
406
407							# report the results
408	1587	50				3516	my @result = (
409	5600					6829	map { ( $_, ($total ? $result{$_} / $total : 0)) }
410	63					434	sort { $result{$b} <=> $result{$a} } keys %result
411							);
412
413	63	100				1705	return wantarray ? @result : $result[0];
414							}
415
416							sub _make_hash {
417	11			11		17	my %hash;
418	11					20	my $temp = shift;
419	11					29	for (ref($temp)) {
420	11	100				51	if (/^HASH$/) {
		100
421	1					2	%hash = %{$temp};
	1					7
422							}
423							elsif (/^ARRAY$/) {
424	1					2	for (@{$temp}) {
	1					3
425	2					7	$hash{$_}++;
426							}
427							}
428							else {
429	9					43	$hash{$temp} = 1;
430							}
431							}
432	11					51	%hash;
433							}
434
435							=head2 langof_file
436
437							I works just like I, with the exception that it
438							reveives filenames instead of text. It reads these texts (if existing
439							and readable, of course) and parses its content.
440
441							Currently, I assumes the files are regular text. This may
442							change in the future and the files might be scanned to check their
443							filetype and then parsed to extract only their textual content (which
444							should be pretty useful so that you can perform language
445							identification, say, in HTML files, or PDFs).
446
447							To identify the language a file is written in:
448
449							$language = langof_file($path);
450
451							To get the most probable language and also the percentage of its probability,
452							do:
453
454							($language, $probability) = langof_file($path);
455
456							If you want a hash where each active language is mapped into its percentage,
457							use this:
458
459							%languages = langof_file($path);
460
461							If you pass more than one file to I, they will all be
462							read and their content merged and then parsed for language
463							identification.
464
465							=cut
466
467							sub langof_file {
468	31			31	1	21768	my %config = ();
469	31	100				156	if (ref($_[0]) eq 'HASH') {%config = (%config, %{+shift})}
	2					5
	2					11
470
471							=head3 OPTIONS
472
473							I accepts all the options I does, so refer to
474							those first (up in this document).
475
476							$language = langof_file(\%config, $path);
477
478							I currently only reads the first 10,000 bytes of each
479							file.
480
481							You can force an input encoding with C<< { encoding => 'ISO-8859-1' } >>
482							in the configuration hash.
483
484							=cut
485
486							# select max-size
487	31	50				142	my $maxsize = defined $config{'max-size'} ? $config{'max-size'}
488							: $default_maxsize;
489
490	31					97	my @files = @_;
491	31					74	my $text = '';
492
493	31					72	for my $file (@files) {
494							#-r and -e or next;
495	32	100				114	if (exists($config{encoding})) {
496	1	50		1		45	open(FILE, "<:encoding($config{encoding})", $file) or next;
	1					9
	1					3
	1					8
497							} else {
498	31	50				1739	open(FILE, "<:utf8", $file) or next;
499							}
500	32					13707	local $/ = \$maxsize;
501	32					1980	$text .= ;
502	32					812	close(FILE);
503							}
504
505	31					134	return langof(\%config,$text);
506							}
507
508							=head2 confidence
509
510							After getting the results into an array, its first element is the most probable
511							language. That doesn't mean it is very probable or not.
512
513							You can find more about the likeliness of the results to be accurate by
514							computing its confidence level.
515
516							use Lingua::Identify qw/:language_identification/;
517							my @results = langof($text);
518							my $confidence_level = confidence(@results);
519							# $confidence_level now holds a value between 0.5 and 1; the higher that
520							# value, the more accurate the results seem to be
521
522							The formula used is pretty simple: p1 / (p1 + p2) , where p1 is the
523							probability of the most likely language and p2 is the probability of
524							the language which came in second. A couple of examples to illustrate
525							this:
526
527							English 50% Portuguese 10% ...
528
529							confidence level: 50 / (50 + 10) = 0.83
530
531							Another example:
532
533							Spanish 30% Portuguese 10% ...
534
535							confidence level: 30 / (25 + 30) = 0.55
536
537							French 10% German 5% ...
538
539							confidence level: 10 / (10 + 5) = 0.67
540
541							As you can see, the first example is probably the most accurate one.
542							Are there any doubts? The English language has five times the
543							probability of the second language.
544
545							The second example is a bit more tricky. 55% confidence. The
546							confidence level is always above 50%, for obvious reasons. 55% doesn't
547							make anyone confident in the results, and one shouldn't be, with
548							results such as these.
549
550							Notice the third example. The confidence level goes up to 67%, but the
551							probability of French is of mere 10%. So what? It's twice as much as
552							the second language. The low probability may well be caused by a great
553							number of languages in play.
554
555							=cut
556
557							sub confidence {
558	62	100	66	62	1	90196	defined $_[1] and $_[1] or return 0;
559	59	100	33			427	defined $_[3] and $_[3] or return 1;
560	58					359	$_[1] / ($_[1] + $_[3]);
561							}
562
563							=head2 get_all_methods
564
565							Returns a list comprised of all the available methods for language
566							identification.
567
568							=cut
569
570							sub get_all_methods {
571	1			1	1	28	qw/smallwords
572							prefixes1 prefixes2 prefixes3 prefixes4
573							suffixes1 suffixes2 suffixes3 suffixes4
574							ngrams1 ngrams2 ngrams3 ngrams4/
575							}
576
577							=head1 LANGUAGE IDENTIFICATION IN GENERAL
578
579							Language identification is based in patterns.
580
581							In order to identify the language a given text is written in, we repeat a given
582							process for each active language (see section LANGUAGES MANIPULATION); in that
583							process, we look for common patterns of that language. Those patterns can be
584							prefixes, suffixes, common words, ngrams or even sequences of words.
585
586							After repeating the process for each language, the total score for each of them
587							is then used to compute the probability (in percentage) for each language to be
588							the one of that text.
589
590							=cut
591
592							sub _langof_by_method {
593	237			237		900	my ($method, $elements, $text) = @_;
594	237					341	my (%result, $total);
595
596	237					703	for my $language (get_active_languages()) {
597	6346					6932	for (keys %{$languages{$method}{$language}}) {
	6346					52534
598	173900	100				321310	if (exists $$elements{$_}) {
599	19274					50134	$result{$language} +=
600							$$elements{$_} * ${languages{$method}{$language}{$_}};
601	19274					43806	$total +=
602							$$elements{$_} * ${languages{$method}{$language}{$_}};
603							}
604							}
605							}
606
607	3167	50				7917	my @result = (
608	9268					11927	map { ( $_, ($total ? $result{$_} / $total : 0)) }
609	237					2047	sort { $result{$b} <=> $result{$a} } keys %result
610							);
611
612	237	50				11136	return wantarray ? @result : $result[0];
613							}
614
615							=head1 METHODS OF LANGUAGE IDENTIFICATION
616
617							C currently comprises four different ways for language
618							identification, in a total of thirteen variations of those.
619
620							The available methods are the following: B, B,
621							B, B, B, B, B,
622							B, B, B, B, B and B.
623
624							Here's a more detailed explanation of each of those ways and those methods
625
626							=head2 Small Word Technique - B
627
628							The "Small Word Technique" searches the text for the most common words of each
629							active language. These words are usually articles, pronouns, etc, which happen
630							to be (usually) the shortest words of the language; hence, the method name.
631
632							This is usually a good method for big texts, especially if you happen to have
633							few languages active.
634
635							=cut
636
637							sub _langof_by_word_method {
638	60			60		291	my ($method, $text) = @_;
639
640							sub _words_count {
641	60			60		152	my ($words, $text) = @_;
642	60					20186	for my $word (split /[\s\n]+/, $text) {
643	17632					33905	$words->{$word}++
644							}
645							}
646
647	60					154	my %words;
648	60					262	_words_count(\%words, $text);
649	60					1593	return _langof_by_method($method, \%words, $text);
650							}
651
652							=head2 Used Letters - B
653
654							In this technique used letters are compared with the letters from each
655							language. This method is not meant to be used alone, but to help
656							other methods to differentiate between dialects.
657
658							=cut
659
660							sub _langof_by_used_letters {
661	58			58		259	my ($method, $text) = @_;
662
663	58					515	my $letters = ngram_counts( { spaces => 0 }, $text, 1);
664
665	58					104078	my ($total, %result);
666	58					207	for my $language (get_active_languages()) {
667	1566					2777	$result{$language} = 1;
668	1566					12228	for my $l (keys %$letters) {
669	44469	100				120159	if (not exists($languages{letters}{$language}{$l})) {
670	10144					21640	$result{$language} -= .1;
671							}
672							}
673	1566	100				7144	$result{$language} = 0 if $result{$language} < 0;
674	1566					2517	$total += $result{$language};
675							}
676	1566	50				3782	my @result = (
677	5069					6143	map { ( $_, ($total ? $result{$_} / $total : 0)) }
678	58					609	sort { $result{$b} <=> $result{$a} } keys %result
679							);
680
681	58	50				1678	return wantarray ? @result : $result[0];
682							}
683
684
685							=head2 Prefix Analysis - B, B, B, B
686
687							This method analyses text for the common prefixes of each active language.
688
689							The methods are, respectively, for prefixes of size 1, 2, 3 and 4.
690
691							=cut
692
693							sub _langof_by_prefix_method {
694	7			7		7103	use Text::Affixes;
	7					26870
	7					1089
695
696	58			58		389	(my $method = shift) =~ /^prefixes(\d)$/;
697	58					223	my $text = shift;
698
699	58					607	my $prefixes = get_prefixes( {min => $1, max => $1}, $text);
700
701	58					78680	return _langof_by_method($method, $$prefixes{$1}, $text);
702							}
703
704							=head2 Suffix Analysis - B, B, B, B
705
706							Similar to the Prefix Analysis (see above), but instead analysing common
707							suffixes.
708
709							The methods are, respectively, for suffixes of size 1, 2, 3 and 4.
710
711							=cut
712
713							sub _langof_by_suffix_method {
714	7			7		47	use Text::Affixes;
	7					14
	7					886
715
716	61			61		389	(my $method = shift) =~ /^suffixes(\d)$/;
717	61					293	my $text = shift;
718
719	61					610	my $suffixes = get_suffixes({min => $1, max => $1}, $text);
720
721	61					78743	return _langof_by_method($method, $$suffixes{$1}, $text);
722							}
723
724							###
725
726							# Have you seen my brother? He's a two line long comment. I think he
727							# might be lost... :-\ Me and my father have been looking for him for
728							# some time now :-/
729
730							###
731
732							=head2 Ngram Categorization - B, B, B, B
733
734							Ngrams are sequences of tokens. You can think of them as syllables, but they
735							are also more than that, as they are not only comprised by characters, but also
736							by spaces (delimiting or separating words).
737
738							Ngrams are a very good way for identifying languages, given that the most
739							common ones of each language are not generally very common in others.
740
741							This is usually the best method for small amounts of text or too many active
742							languages.
743
744							The methods are, respectively, for ngrams of size 1, 2, 3 and 4.
745
746							=cut
747
748							sub _langof_by_ngram_method {
749	7			7		5687	use Text::Ngram qw(ngram_counts);
	7					22564
	7					11309
750
751	58			58		339	(my $method = shift) =~ /^ngrams([1-4])$/;
752	58					182	my $text = shift;
753
754	58					445	my $ngrams = ngram_counts( {spaces => 0}, $text, $1);
755
756	58	50				219808	$ngrams = 'letters' if $ngrams eq "ngrams1";
757	58					246	return _langof_by_method($method, $ngrams, $text);
758							}
759
760							=head1 LANGUAGE MANIPULATION
761
762							When trying to perform language identification, C works not with
763							all available languages, but instead with the ones that are active.
764
765							By default, all available languages are active, but that can be changed by the
766							user.
767
768							For your convenience, several methods regarding language manipulation were
769							created. In order to use them, load the module with the tag
770							:language_manipulation.
771
772							These methods work with the two letters code for languages.
773
774							=over 6
775
776							=item B
777
778							Activate a language
779
780							activate_language('en');
781
782							# or
783
784							activate_language($_) for get_all_languages();
785
786							=cut
787
788							sub activate_language {
789	1	50		1	1	11	unless (grep { $_ eq $_[0] } @active_languages) {
	0					0
790	1					4	push @active_languages, $_[0];
791							}
792	1					5	return @active_languages;
793							}
794
795							=item B
796
797							Activates all languages
798
799							activate_all_languages();
800
801							=cut
802
803							sub activate_all_languages {
804	2			2	1	6	@active_languages = get_all_languages();
805	2					16	return @active_languages;
806							}
807
808							=item B
809
810							Deactivates a language
811
812							deactivate_language('en');
813
814							=cut
815
816							sub deactivate_language {
817	1			1	1	5	@active_languages = grep { ! ($_ eq $_[0]) } @active_languages;
	27					40
818	1					13	return @active_languages;
819							}
820
821							=item B
822
823							Deactivates all languages
824
825							deactivate_all_languages();
826
827							=cut
828
829							sub deactivate_all_languages {
830	3			3	1	15	@active_languages = ();
831	3					13	return @active_languages;
832							}
833
834							=item B
835
836							Returns the names of all available languages
837
838							my @all_languages = get_all_languages();
839
840							=cut
841
842							sub get_all_languages {
843	55			55	1	1067	return @all_languages;
844							}
845
846							=item B
847
848							Returns the names of all active languages
849
850							my @active_languages = get_active_languages();
851
852							=cut
853
854							sub get_active_languages {
855	367			367	1	2873	return @active_languages;
856							}
857
858							=item B
859
860							Returns the names of all inactive languages
861
862							my @active_languages = get_inactive_languages();
863
864							=cut
865
866							sub get_inactive_languages {
867	2			2	1	739	return grep { ! is_active($_) } get_all_languages();
	54					67
868							}
869
870							=item B
871
872							Returns the name of the language if it is active, an empty list otherwise
873
874							if (is_active('en')) {
875							# YOUR CODE HERE
876							}
877
878							=cut
879
880							sub is_active {
881	56			56	1	73	return grep { $_ eq $_[0] } get_active_languages();
	729					854
882							}
883
884							=item B
885
886							Returns the name of the language if it exists, an empty list otherwise
887
888							if (is_valid_language('en')) {
889							# YOUR CODE HERE
890							}
891
892							=cut
893
894							sub is_valid_language {
895	37			37	1	13243	return grep { $_ eq $_[0] } get_all_languages();
	999					1359
896							}
897
898							=item B
899
900							Sets the active languages
901
902							set_active_languages('en', 'pt');
903
904							# or
905
906							set_active_languages(get_all_languages());
907
908							=cut
909
910							sub set_active_languages {
911	3			3	1	11	@active_languages = grep { is_valid_language($_) } @_;
	6					18
912	3					25	return @active_languages;
913							}
914
915							=item B
916
917							Given the two letter tag of a language, returns its name
918
919							my $language_name = name_of('pt');
920
921							=cut
922
923							sub name_of {
924	1		50	1	1	4	my $tag = shift \|\| return undef;
925	1					7	return $languages{_names}{$tag};
926							}
927
928							=back
929
930							=cut
931
932							1;
933							__END__