File Coverage

blib/lib/Lingua/Preferred.pm
Criterion Covered Total %
statement 70 78 89.7
branch 35 42 83.3
condition n/a
subroutine 9 9 100.0
pod 0 2 0.0
total 114 131 87.0


line stmt bran cond sub pod time code
1             package Lingua::Preferred;
2              
3 1     1   1137 use strict;
  1         3  
  1         43  
4 1     1   9 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
  1         2  
  1         227  
5              
6             require Exporter;
7             require AutoLoader;
8              
9             # Use Log::TraceMessages if installed.
10             BEGIN {
11 1     1   2 eval { require Log::TraceMessages };
  1         602  
12 1 50       7 if ($@) {
13 1     277   7 *t = sub {};
  277         311  
14 1     150   1197 *d = sub { '' };
  150         390  
15             }
16             else {
17 0         0 *t = \&Log::TraceMessages::t;
18 0         0 *d = \&Log::TraceMessages::d;
19 0         0 Log::TraceMessages::check_argv();
20             }
21             }
22              
23             @ISA = qw(Exporter AutoLoader);
24             @EXPORT = qw(); @EXPORT_OK = qw(which_lang acceptable_lang);
25             $VERSION = '0.2.4';
26              
27             =pod
28              
29             =head1 NAME
30              
31             Lingua::Preferred - Perl extension to choose a language
32              
33             =head1 SYNOPSIS
34              
35             use Lingua::Preferred qw(which_lang acceptable_lang);
36             my @wanted = qw(en de fr it de_CH);
37             my @available = qw(fr it de);
38              
39             my $which = which_lang(\@wanted, \@available);
40             print "language $which is the best of those available\n";
41              
42             foreach (qw(en_US fr nl de_DE)) {
43             print "language $_ is acceptable\n"
44             if acceptable_lang(\@wanted, $_);
45             }
46              
47             =head1 DESCRIPTION
48              
49             Often human-readable information is available in more than one
50             language. Which should you use? This module provides a way for the
51             user to specify possible languages in order of preference, and then to
52             pick the best language of those available. Different 'dialects' given
53             by the 'territory' part of the language specifier (such as en, en_GB,
54             and en_US) are also supported.
55              
56             The routine C<which_lang()> picks the best language from a list of
57             alternatives. The arguments are:
58              
59             =over
60              
61             =item
62              
63             a reference to a list of preferred languages (first is best). Here, a
64             language is a string like C<'en'> or C<'fr_CA'>. (C<'fr_*'> can also
65             be given - see below.) C<'C'> (named for the Unix 'C' locale) matches
66             any language.
67              
68             =item
69              
70             a reference to non-empty list of available languages. Here, a
71             language can be like C<'en'>, C<'en_CA'>, or C<undef> meaning 'unknown'.
72              
73             =back
74              
75             The return code is which language to use. This will always be an
76             element of the available languages list.
77              
78             The cleverness of this module (if you can call it that) comes from
79             inferring implicit language preferences based on the explicit list
80             passed in. For example, if you say that en is acceptable, then en_IE
81             and en_DK will presumably be acceptable too (but not as good as just
82             plain en). If you give your language as en_US, then en is almost as
83             good, with the other dialects of en following soon afterwards.
84              
85             If there is a tie between two choices, as when two dialects of the
86             same language are available and neither is explicitly preferred, or
87             when none of the available languages appears in the userE<39>s list,
88             then the choice appearing earlier in the available list is preferred.
89              
90             Sometimes, the automatic inferring of related dialects is not what you
91             want, because a language dialect may be very different to the 'main'
92             language, for example Swiss German or some forms of English. For this
93             case, the special form 'XX_*' is available. If you dislike Mexican
94             Spanish (as a completely arbitrary example), then C<[ 'es', 'es_*',
95             'es_MX' ]> would rank this dialect below any other dialect of es (but
96             still acceptable). You donE<39>t have to explicitly list every other
97             dialect of Spanish before es_MX.
98              
99             So for example, supposing C<@avail> contains the languages available:
100              
101             =over
102              
103             =item
104              
105             You know English and prefer US English:
106              
107             $which = which_lang([ 'en_US' ], \@avail);
108              
109             =item
110              
111             You know English and German, German/Germany is preferred:
112              
113             $which = which_lang([ 'en', 'de_DE' ], \@avail);
114              
115             =item
116              
117             You know English and German, but preferably not Swiss German:
118              
119             $which = which_lang([ 'en', 'de', 'de_*', 'de_CH' ], \@avail);
120              
121             Here any dialect of German (eg de_DE, de_AT) is preferable to de_CH.
122              
123             =cut
124             sub which_lang( $$ ) {
125 39 50   39 0 5148 die 'usage: which_lang(listref of preferred langs, listref of available)'
126             if @_ != 2;
127 39         54 my ($pref, $avail) = @_;
128 39         76 t '$pref=' . d $pref;
129 39         64 t '$avail=' . d $avail;
130              
131 39         41 my (%explicit, %implicit);
132 39         43 my $pos = 0;
133              
134             # This seems like the best way to make block-nested subroutines
135             my $add_explicit = sub {
136 63     63   76 my $l = shift;
137 63 50       128 die "preferred language $l listed twice"
138             if defined $explicit{$l};
139 63 100       108 if (delete $implicit{$l}) { t "moved implicit $l to explicit" }
  6         16  
140 57         133 else { t "adding explicit $l" }
141 63         142 $explicit{$l} = $pos++;
142 39         171 };
143             my $add_implicit = sub {
144 64     64   105 my $l = shift;
145 64 100       120 if (defined $explicit{$l}) {
146 14         34 t "$l already explict, not adding implicitly";
147             }
148             else {
149 50 100       4833 if (defined $implicit{$l}) { t "replacing implicit $l" }
  4         9  
150 46         103 else { t "adding implicit $l" }
151 50         155 $implicit{$l} = $pos++
152             }
153 39         115 };
154              
155 39         75 foreach (@$pref) {
156 63         107 $add_explicit->($_);
157              
158 63 100       284 if ($_ eq 'C') {
    100          
    100          
    50          
159             # Doesn't imply anything - C already matches every
160             # possible language.
161             #
162             }
163             elsif (/^[a-z][a-z]$/) {
164             # 'en' implies any dialect of 'en' also
165 30         70 $add_implicit->($_ . '_*');
166             }
167             elsif (/^([a-z][a-z])_([A-Z][A-Z])(?:\@.*)?$/) { # ignore @whatever
168             # 'en_GB' implies 'en', and secondly any other dialect
169 17         31 $add_implicit->($1);
170 17         46 $add_implicit->($1 . '_*');
171             }
172             elsif (/^([a-z][a-z])_\*$/) {
173             # 'en_*' doesn't imply anything - it shouldn't be used
174             # except in odd cases.
175             #
176             }
177 0         0 else { die "bad language '$_'" } # FIXME support 'English' etc
178             }
179              
180 39         187 my %ranking = reverse (%explicit, %implicit);
181 39 50       82 if ($Log::TraceMessages::On) {
182 0         0 t 'ranking:';
183 0         0 foreach (sort { $a <=> $b } keys %ranking) {
  0         0  
184 0         0 t "$_\t$ranking{$_}";
185             }
186             }
187              
188 39         115 my @langs = @ranking{sort { $a <=> $b } keys %ranking};
  93         193  
189 39         5808 my %avail;
190 39         66 foreach (@$avail) {
191 62 100       114 next if not defined;
192 56 50       175 $avail{$_}++ && die "available language $_ listed twice";
193             }
194              
195 39         96 while (defined (my $lang = shift @langs)) {
196 72 100       1259 if ($lang eq 'C') {
    100          
197             # Match first available language.
198 5         52 return $avail->[0];
199             }
200             elsif ($lang =~ /^([a-z][a-z])_\*$/) {
201             # Any dialect of $1 (but not standard). Work through all
202             # of @$avail in order trying to find a match. (So there
203             # is a slight bias towards languages appearing earlier in
204             # @$avail.)
205             #
206 23         50 my $base_lang = $1;
207 23         32 AVAIL: foreach (@$avail) {
208 31 100       72 next if not defined;
209 26 100       233 if (/^\Q$base_lang\E_/) {
210             # Well, it matched... but maybe this dialect was
211             # explicitly specified with a lower priority.
212             #
213 11         15 foreach my $lower_lang (@langs) {
214 9 100       78 next AVAIL if (/^\Q$lower_lang\E$/);
215             }
216            
217 7         94 return $_;
218             }
219             }
220             }
221             else {
222             # Exact match
223 44 100       313 return $lang if $avail{$lang};
224             }
225             }
226              
227             # Couldn't find anything - pick first available language.
228 12         142 return $avail->[0];
229             }
230              
231             =pod
232              
233             Whereas C<which_lang()> picks the best language from a list of
234             alternatives, C<acceptable_lang()> answers whether a single
235             language is included (explicitly or implicitly) in the list of wanted
236             languages. It adds the implicit dialects in the same way.
237              
238             =cut
239             sub acceptable_lang( $$ ) {
240 36 50   36 0 1252 die 'usage: acceptable_lang(listref of wanted langs, lang)'
241             if @_ != 2;
242 36         49 my ($pref, $l) = @_;
243 36         61 t '$pref=' . d $pref;
244 36         87 t '$l=' . d $l;
245              
246             # We just need to ignore the dialects and compare the main part.
247 36         70 my @pref = @$pref; # copy
248 36         88 $l =~ s/_.+//;
249 36         62 foreach (@pref) {
250 49         85 s/_.+//;
251 49 100       141 return 1 if $l eq $_;
252             }
253 14         38 return 0;
254             }
255              
256             =pod
257              
258             =head1 AUTHOR
259              
260             Ed Avis, ed@membled.com
261              
262             =head1 SEE ALSO
263              
264             perl(1).
265              
266             =cut
267              
268             1;
269             __END__