File Coverage

blib/lib/Locale/Util.pm
Criterion Covered Total %
statement 87 215 40.4
branch 22 106 20.7
condition 3 33 9.0
subroutine 10 14 71.4
pod 6 6 100.0
total 128 374 34.2


line stmt bran cond sub pod time code
1             #! /bin/false
2              
3             # vim: set autoindent shiftwidth=4 tabstop=4:
4              
5             # Portable methods for locale handling.
6             # Copyright (C) 2002-2017 Guido Flohr ,
7             # all rights reserved.
8              
9             # This program is free software: you can redistribute it and/or modify
10             # it under the terms of the GNU General Public License as published by
11             # the Free Software Foundation; either version 3 of the License, or
12             # (at your option) any later version.
13              
14             # This program is distributed in the hope that it will be useful,
15             # but WITHOUT ANY WARRANTY; without even the implied warranty of
16             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17             # GNU General Public License for more details.
18              
19             # You should have received a copy of the GNU General Public License
20             # along with this program. If not, see .
21              
22             package Locale::Util;
23              
24 12     12   11468 use strict;
  12         36  
  12         418  
25              
26 12     12   66 use constant DEBUG => 0;
  12         22  
  12         860  
27              
28 12     12   73 use base qw (Exporter);
  12         24  
  12         1243  
29              
30 12     12   81 use vars qw (@EXPORT_OK);
  12         34  
  12         4767  
31              
32             @EXPORT_OK = qw (parse_http_accept_language
33             parse_http_accept_charset
34             set_locale set_locale_cache get_locale_cache
35             web_set_locale);
36              
37             # The following list maps languages to a rough guess of the country that
38             # is most likely to be meant if no locale info for the country alone is
39             # found. I have assembled the list to the best of my knowledge, preferring
40             # the country that has the language as its official language, and in doubt
41             # listing the country that has the most speakers of that language. Corrections
42             # are welcome.
43 12         7669 use constant LANG2COUNTRY => {
44             aa => 'ET', # Afar => Ethiopia
45             ab => 'AB', # Abkhazian => Georgia
46             # ae => '??', # Avestan => ??, Iran?
47             af => 'za', # Afrikaans => South Africa
48             am => 'ET', # Amharic => Ethiopia
49             ar => 'EG', # Arabic => Egypt
50             as => 'IN', # Assamese => India
51             ay => 'BO', # Aymara => Bolivia
52             az => 'AZ', # Azerbaijani => Azerbaijan
53             ba => 'RU', # Bashkir => Russia
54             be => 'BY', # Belarusian => Belarus
55             bg => 'BG', # Bulgarian => Bulgaria
56             bh => 'IN', # Bihari => India
57             bi => 'VU', # Bislama => Vanuatu
58             bn => 'BD', # Bengali => Bangladesh
59             bo => 'CN', # Tibetan => China
60             br => 'FR', # Breton => France
61             bs => 'BA', # Bosnian => Bosnia and Herzegovina
62             ca => 'ES', # Catalan => Spain
63             ce => 'RU', # Chechen => Russia
64             ch => '??', # Chamorro => Guam (or mp?)
65             co => 'FR', # Corsican => France
66             cs => 'CZ', # Czech => Czech Republic
67             cu => 'BG', # Church Slavic => Bulgaria
68             cv => 'RU', # Chuvash => Russia
69             cy => 'GB', # Welsh => United Kingdom
70             da => 'DK', # Danish => Denmark
71             de => 'DE', # German => Germany
72             dz => 'BT', # Dzongkha => Bhutan
73             el => 'GR', # Greek => Greece
74             en => 'US', # English => United States
75             es => 'ES', # Actually Mexico and the US have more Spanish speakers
76             # than Spain. But it can be assumed that they either add
77             # the country to their browser settings or will not care
78             # to much.
79             et => 'EE', # Estonian => Estonia
80             fa => 'IR', # Iran, Islamic Republic of
81             fi => 'FI', # Finnish => Finland
82             fj => 'FJ', # Fijian => Fiji
83             fo => 'FO', # Faeroese => Faroe Islands
84             fr => 'FR', # French => France
85             fy => 'FY', # Frisian => Netherlands
86             ga => 'IE', # Irish => Ireland
87             gd => 'GB', # Gaelic (Scots) => United Kingdom
88             gl => 'ES', # Gallegan => Spain
89             gn => 'PY', # Guarani => Paraguay
90             gu => 'IN', # Gujarati => IN
91             gv => 'GB', # Manx => United Kingdom
92             ha => 'NE', # Hausa => Niger (ng?)
93             he => 'IL', # Hebrew => Israel
94             hi => 'IN', # Hindi => India
95             ho => 'PG', # Hiri Motu => Papua New Guinea
96             hr => 'HR', # Croatian
97             hu => 'HU', # Hungarian => Hungary
98             hy => 'AM', # Armenian => Armenia
99             hz => 'NA', # Herero => Namibia
100             # ia => '??', # Interlingua (aka "latino sine flexione") => ??
101             id => 'ID', # Indonesian => Indonesia
102             # ie => '??', # Interlingue => ???
103             ik => 'US', # Inupiaq => United States
104             is => 'IS', # Icelandic => Iceland
105             it => 'IT', # Italian => Italy
106             iu => 'CA', # Inuktitut => Canada
107             iw => 'IL', # Hebrew => Israel
108             ja => 'JP', # Japanese => Japan
109             jw => 'ID', # Javanese => Indonesia
110             ka => 'GE', # Georgian => Georgia
111             ki => 'KE', # Kikuyu => Kenya
112             kj => 'AO', # Kuanyama => Angola (na?)
113             kk => 'KZ', # Kazakh => Kazakhstan
114             kl => 'GL', # Kalaallisut => Greenland
115             km => 'KH', # Khmer => Cambodia
116             kn => 'IN', # Kannada => India
117             ko => 'KR', # Korean => Korea, Republic of (more speakers than North Korea)
118             ks => 'IN', # Kashmiri => India
119             ku => 'TR', # Kurdish => Turkey
120             kv => 'RU', # Komi => Russia
121             kw => 'GB', # Cornish => United Kingdom
122             ky => 'KG', # Kirghyz => Kyrgyzstan
123             la => 'VA', # Latin => Holy See (Vatican City State)
124             lb => 'LU', # Letzeburgesch => Luxembourg
125             ln => 'CG', # Lingala => Republic of the Congo (cd?)
126             lo => 'LA', # Lao => Lao People's Democratic Republic
127             lt => 'LT', # Lithuanian => Lithuania
128             lv => 'LV', # Latvian => Latvia
129             mg => 'MG', # Malagasy => Madagascar
130             mh => 'MH', # Marshall => Marshall Islands
131             mi => 'NZ', # Maori => New Zealand
132             mk => 'MK', # Macedonian => Macedonia, the Former Yugoslav Republic of
133             ml => 'IN', # Malayalam => India
134             mn => 'MN', # Mongolian => Mongolia
135             mr => 'IN', # Marathi => India
136             ms => 'MY', # Malay => Malaysia (FIXME: not really sure ...)
137             mt => 'MT', # Maltese => Malta
138             my => 'MM', # Burmese => Myanmar
139             na => 'NR', # Nauru => Nauru
140             nb => 'NO', # Norwegian BokmÃ¥l => Norway
141             nd => 'ZA', # Ndebele, North => South Africa
142             ne => 'NP', # Nepali => Nepal
143             ng => 'NA', # Ndonga => Namibia
144             nl => 'NL', # Dutch => Netherlands
145             nn => 'NO', # Norwegian Nynorsk => Norway
146             no => 'NO', # Norwegian => Norway
147             nr => 'ZA', # Ndebele, South => South Africa
148             nv => 'US', # Navajo => United States
149             ny => 'MW', # Chichewa; Nyanja => Malawi
150             oc => 'FR', # Occitan (post 1500) => France
151             om => 'ET', # Oromo => Ethiopia
152             or => 'IN', # Oriya => India
153             os => 'RU', # Ossetian; Ossetic => Russia (FIXME: Or Georgia?)
154             pa => 'IN', # Panjabi => India
155             pi => 'IN', # Pali => India (FIXME: Or Thailand, Sri Lanka, Myanmar,
156             # Cambodia)
157             pl => 'PL', # Polish => Poland
158             ps => 'PK', # Pushto => Pakistan
159             pt => 'PT', # Portuguese => Portugal (following our rules this should
160             # actually be Brazil but that would be to unrealistic,
161             # people from Brazil set their locale to pt_BR).
162             qu => 'PE', # Quechua => Peru
163             rm => 'CH', # Rhaeto-Romance => Switzerland
164             rn => 'RW', # Rundi => Rwanda
165             ro => 'RO', # Romanian => Romania
166             ru => 'RU', # Russian => Russia
167             rw => 'RW', # Kinyarwanda => Rwanda
168             sa => 'IN', # Sanskrit => India
169             sc => 'IT', # Sardinian => Italy
170             sd => 'IN', # Sindhi => India
171             se => 'SE', # Sami => Sweden (Totally unsure here. The Sami languages
172             # are also spoken in Norway, Finland and Russia, but the
173             # largest part of the area seems to be in Sweden.
174             sg => '??', # Sango => Central African Republic
175             si => 'LK', # Sinhalese => Sri Lanka
176             sk => 'SK', # Slovakian => Slovakia
177             sl => 'SI', # Slovenian => Slovenia
178             sm => 'WS', # Samoan => Samoa
179             sh => 'ZW', # Shona => Zimbabwe (FIXME: Rather Mozambique?)
180             so => 'SO', # Somali => Somalia
181             sq => 'AL', # Albanian => Albania
182             sr => 'YU', # Serbian => Yugoslavia
183             ss => '??', # Swati => Swaziland (za?)
184             st => 'LS', # Sotho => Lesotho
185             su => 'IN', # Sundanese => Indonesia
186             sv => 'SE', # Swedish => Sweden
187             sw => 'TZ', # Suaheli => Tanzania, United Republic of
188             ta => 'LK', # Tamil => Sri Lanka
189             te => 'IN', # Telugu => India
190             tg => 'TJ', # Tajik => Tajikistan
191             th => 'TH', # Thai => Thailand
192             ti => 'ER', # Tigrinya => Eritrea
193             tk => 'TM', # Turkmen => Turkmenistan
194             tl => 'PH', # Tagalog => Philippines
195             tn => 'BW', # Tswana => Botswana
196             to => 'TO', # Tonga => Tonga
197             tr => 'TR', # Turkish => Turkish
198             tt => 'RU', # Tatar => Russia
199             tw => 'GH', # Twi => Ghana
200             ug => 'CN', # Uighur => China
201             uk => 'UA', # Ukrainian => Ukraine
202             ur => 'PK', # Urdu => Pakistan
203             uz => 'UZ', # Uzbek => Uzbekistan
204             vi => 'VN', # Vietnamese => Vietnam
205             # vo => '??', # Volapuk => Nowhere
206             wo => 'SN', # Wolof => Senegal
207             xh => 'ZA', # Xhosa => South Africa
208             yi => 'IL', # Yiddish => Israel (FIXME: Rather United States?)
209             yo => 'NG', # Yoruba => Nigeria
210             za => 'CN', # Zhuang => China
211             zh => 'CN', # Chinese => China
212             zu => 'ZA', # Zulu => South Africa
213 12     12   95 };
  12         22  
214              
215 12         7457 use constant WIN32LANGUAGE => {
216             aa => "Afar",
217             ab => "Abkhazian",
218             ae => "Avestan",
219             af => "Afrikaans",
220             am => "Amharic",
221             ar => "Arabic",
222             as => "Assamese",
223             ay => "Aymara",
224             az => "Azerbaijani",
225             ba => "Bashkir",
226             be => "Belarusian",
227             bg => "Bulgarian",
228             bh => "Bihari",
229             bi => "Bislama",
230             bn => "Bengali",
231             bo => "Tibetan",
232             br => "Breton",
233             bs => "Bosnian",
234             ca => "Catalan",
235             ce => "Chechen",
236             ch => "Chamorro",
237             co => "Corsican",
238             cs => "Czech",
239             cu => "Church Slavic",
240             cv => "Chuvash",
241             cy => "Welsh",
242             da => "Danish",
243             de => "German",
244             dz => "Dzongkha",
245             el => "Greek",
246             en => "English",
247             eo => "Esperanto",
248             es => "Spanish",
249             et => "Estonian",
250             eu => "Basque",
251             fa => "Persian",
252             fi => "Finnish",
253             fj => "Fijian",
254             fo => "Faeroese",
255             fr => "French",
256             fy => "Frisian",
257             ga => "Irish",
258             gd => "Gaelic (Scots)",
259             gl => "Gallegan",
260             gn => "Guarani",
261             gu => "Gujarati",
262             gv => "Manx",
263             ha => "Hausa",
264             he => "Hebrew",
265             hi => "Hindi",
266             ho => "Hiri Motu",
267             hr => "Croatian",
268             hu => "Hungarian",
269             hy => "Armenian",
270             hz => "Herero",
271             ia => "Interlingua",
272             id => "Indonesian",
273             ie => "Interlingue",
274             ik => "Inupiaq",
275             is => "Icelandic",
276             it => "Italian",
277             iu => "Inuktitut",
278             ja => "Japanese",
279             jw => "Javanese",
280             ka => "Georgian",
281             ki => "Kikuyu",
282             kj => "Kuanyama",
283             kk => "Kazakh",
284             kl => "Kalaallisut",
285             km => "Khmer",
286             kn => "Kannada",
287             ko => "Korean",
288             ks => "Kashmiri",
289             ku => "Kurdish",
290             kv => "Komi",
291             kw => "Cornish",
292             ky => "Kirghiz",
293             la => "Latin",
294             lb => "Letzeburgesch",
295             ln => "Lingala",
296             lo => "Lao",
297             lt => "Lithuanian",
298             lv => "Latvian",
299             mg => "Malagasy",
300             mh => "Marshall",
301             mi => "Maori",
302             # Sorry, lads, but that is what M$ calls your language ...
303             mk => "FYRO Macedonian",
304             ml => "Malayalam",
305             mn => "Mongolian",
306             mo => "Moldavian",
307             mr => "Marathi",
308             ms => "Malay",
309             mt => "Maltese",
310             my => "Burmese",
311             na => "Nauru",
312             nb => "Norwegian (BokmÃ¥l)",
313             nd => "Ndebele, North",
314             ne => "Nepali",
315             ng => "Ndonga",
316             nl => "Dutch",
317             nn => "Norwegian-Nynorsk",
318             no => "Norwegian-Nynorsk",
319             nr => "Ndebele, South",
320             nv => "Navajo",
321             ny => "Chichewa",
322             oc => "Occitan (post 1500)",
323             om => "Oromo",
324             or => "Oriya",
325             os => "Ossetian",
326             pa => "Panjabi",
327             pi => "Pali",
328             pl => "Polish",
329             ps => "Pushto",
330             pt => "Portuguese",
331             qu => "Quechua",
332             rm => "Rhaeto-Romance",
333             rn => "Rundi",
334             ro => "Romanian",
335             ru => "Russian",
336             rw => "Kinyarwanda",
337             sa => "Sanskrit",
338             sc => "Sardinian",
339             sd => "Sindhi",
340             se => "Sami",
341             sg => "Sango",
342             si => "Sinhalese",
343             sk => "Slovak",
344             sl => "Slovenian",
345             sm => "Samoan",
346             sn => "Shona",
347             so => "Somali",
348             sq => "Albanian",
349             sr => "Serbian",
350             ss => "Swati",
351             st => "Sotho",
352             su => "Sundanese",
353             sv => "Swedish",
354             sw => "Swahili",
355             ta => "Tamil",
356             te => "Telugu",
357             tg => "Tajik",
358             th => "Thai",
359             ti => "Tigrinya",
360             tk => "Turkmen",
361             tl => "Tagalog",
362             tn => "Tswana",
363             to => "Tonga",
364             tr => "Turkish",
365             ts => "Tsonga",
366             tt => "Tatar",
367             tw => "Twi",
368             ug => "Uighur",
369             uk => "Ukrainian",
370             ur => "Urdu",
371             uz => "Uzbek",
372             vi => "Vietnamese",
373             vo => "Volapuk",
374             wo => "Wolof",
375             xh => "Xhosa",
376             yi => "Yiddish",
377             yo => "Yoruba",
378             za => "Zhuang",
379             zh => "Chinese",
380             zu => "Zulu",
381 12     12   93 };
  12         30  
382              
383 12         28450 use constant WIN32COUNTRY => {
384             ad => "Andorra",
385             ae => "United Arab Emirates",
386             af => "Afghanistan",
387             ag => "Antigua and Barbuda",
388             ai => "Anguilla",
389             al => "Albania",
390             am => "Armenia",
391             an => "Netherlands Antilles",
392             ao => "Angola",
393             aq => "Antarctica",
394             ar => "Argentina",
395             as => "American Samoa",
396             at => "Austria",
397             au => "Australia",
398             aw => "Aruba",
399             ax => "Aland Islands",
400             az => "Azerbaijan",
401             ba => "Bosnia and Herzegovina",
402             bb => "Barbados",
403             bd => "Bangladesh",
404             be => "Belgium",
405             bf => "Burkina Faso",
406             bg => "Bulgaria",
407             bh => "Bahrain",
408             bi => "Burundi",
409             bj => "Benin",
410             bm => "Bermuda",
411             bn => "Brunei Darussalam",
412             bo => "Bolivia",
413             br => "Brazil",
414             bs => "Bahamas",
415             bt => "Bhutan",
416             bv => "Bouvet Island",
417             bw => "Botswana",
418             by => "Belarus",
419             bz => "Belize",
420             ca => "Canada",
421             cc => "Cocos (Keeling) Islands",
422             cd => "Congo, The Democratic Republic of the",
423             cf => "Central African Republic",
424             cg => "Congo",
425             ch => "Switzerland",
426             ci => "Cote D'Ivoire",
427             ck => "Cook Islands",
428             cl => "Chile",
429             cm => "Cameroon",
430             cn => "China",
431             co => "Colombia",
432             cr => "Costa Rica",
433             cs => "Serbia and Montenegro",
434             cu => "Cuba",
435             cv => "Cape Verde",
436             cx => "Christmas Island",
437             cy => "Cyprus",
438             cz => "Czech Republic",
439             de => "Germany",
440             dj => "Djibouti",
441             dk => "Denmark",
442             dm => "Dominica",
443             do => "Dominican Republic",
444             dz => "Algeria",
445             ec => "Ecuador",
446             ee => "Estonia",
447             eg => "Egypt",
448             eh => "Western Sahara",
449             er => "Eritrea",
450             es => "Spain",
451             et => "Ethiopia",
452             fi => "Finland",
453             fj => "Fiji",
454             fk => "Falkland Islands (Malvinas)",
455             fm => "Micronesia, Federated States of",
456             fo => "Faroe Islands",
457             fr => "France",
458             fx => "France, Metropolitan",
459             ga => "Gabon",
460             gb => "United Kingdom",
461             gd => "Grenada",
462             ge => "Georgia",
463             gf => "French Guiana",
464             gh => "Ghana",
465             gi => "Gibraltar",
466             gl => "Greenland",
467             gm => "Gambia",
468             gn => "Guinea",
469             gp => "Guadeloupe",
470             gq => "Equatorial Guinea",
471             gr => "Greece",
472             gs => "South Georgia and the South Sandwich Islands",
473             gt => "Guatemala",
474             gu => "Guam",
475             gw => "Guinea-Bissau",
476             gy => "Guyana",
477             hk => "Hong Kong",
478             hm => "Heard Island and McDonald Islands",
479             hn => "Honduras",
480             hr => "Croatia",
481             ht => "Haiti",
482             hu => "Hungary",
483             id => "Indonesia",
484             ie => "Ireland",
485             il => "Israel",
486             in => "India",
487             io => "British Indian Ocean Territory",
488             iq => "Iraq",
489             ir => "Iran",
490             is => "Iceland",
491             it => "Italy",
492             jm => "Jamaica",
493             jo => "Jordan",
494             jp => "Japan",
495             ke => "Kenya",
496             kg => "Kyrgyzstan",
497             kh => "Cambodia",
498             ki => "Kiribati",
499             km => "Comoros",
500             kn => "Saint Kitts and Nevis",
501             kp => "North-Korea",
502             kr => "Korea",
503             kw => "Kuwait",
504             ky => "Cayman Islands",
505             kz => "Kazakhstan",
506             la => "Laos",
507             lb => "Lebanon",
508             lc => "Saint Lucia",
509             li => "Liechtenstein",
510             lk => "Sri Lanka",
511             lr => "Liberia",
512             ls => "Lesotho",
513             lt => "Lithuania",
514             lu => "Luxembourg",
515             lv => "Latvia",
516             ly => "Libyan",
517             ma => "Morocco",
518             mc => "Monaco",
519             md => "Moldova",
520             mg => "Madagascar",
521             mh => "Marshall Islands",
522             mk => "Former Yugoslav Republic of Macedonia",
523             ml => "Mali",
524             mm => "Myanmar",
525             mn => "Mongolia",
526             mo => "Macao",
527             mp => "Northern Mariana Islands",
528             mq => "Martinique",
529             mr => "Mauritania",
530             ms => "Montserrat",
531             mt => "Malta",
532             mu => "Mauritius",
533             mv => "Maldives",
534             mw => "Malawi",
535             mx => "Mexico",
536             my => "Malaysia",
537             mz => "Mozambique",
538             na => "Namibia",
539             nc => "New Caledonia",
540             ne => "Niger",
541             nf => "Norfolk Island",
542             ng => "Nigeria",
543             ni => "Nicaragua",
544             nl => "Netherlands",
545             no => "Norway",
546             np => "Nepal",
547             nr => "Nauru",
548             nu => "Niue",
549             nz => "New Zealand",
550             om => "Oman",
551             pa => "Panama",
552             pe => "Peru",
553             pf => "French Polynesia",
554             pg => "Papua New Guinea",
555             ph => "Philippines",
556             pk => "Pakistan",
557             pl => "Poland",
558             pm => "Saint Pierre and Miquelon",
559             pn => "Pitcairn",
560             pr => "Puerto Rico",
561             ps => "Palestinian Territory, Occupied",
562             pt => "Portugal",
563             pw => "Palau",
564             py => "Paraguay",
565             qa => "Qatar",
566             re => "Reunion",
567             ro => "Romania",
568             ru => "Russian Federation",
569             rw => "Rwanda",
570             sa => "Saudi Arabia",
571             sb => "Solomon Islands",
572             sc => "Seychelles",
573             sd => "Sudan",
574             se => "Sweden",
575             sg => "Singapore",
576             sh => "Saint Helena",
577             si => "Slovenia",
578             sj => "Svalbard and Jan Mayen",
579             sk => "Slovakia",
580             sl => "Sierra Leone",
581             sm => "San Marino",
582             sn => "Senegal",
583             so => "Somalia",
584             sr => "Suriname",
585             st => "Sao Tome and Principe",
586             sv => "El Salvador",
587             sy => "Syrian Arab Republic",
588             sz => "Swaziland",
589             tc => "Turks and Caicos Islands",
590             td => "Chad",
591             tf => "French Southern Territories",
592             tg => "Togo",
593             th => "Thailand",
594             tj => "Tajikistan",
595             tk => "Tokelau",
596             tl => "Timor-Leste",
597             tm => "Turkmenistan",
598             tn => "Tunisia",
599             to => "Tonga",
600             tr => "Turkey",
601             tt => "Trinidad and Tobago",
602             tv => "Tuvalu",
603             tw => "Taiwan, Province of China",
604             tz => "Tanzania, United Republic of",
605             ua => "Ukraine",
606             ug => "Uganda",
607             um => "United States Minor Outlying Islands",
608             us => "United States",
609             uy => "Uruguay",
610             uz => "Uzbekistan",
611             va => "Holy See (Vatican City State)",
612             vc => "Saint Vincent and the Grenadines",
613             ve => "Venezuela",
614             vg => "Virgin Islands, British",
615             vi => "Virgin Islands, U.S.",
616             vn => "Vietnam",
617             vu => "Vanuatu",
618             wf => "Wallis and Futuna",
619             ws => "Samoa",
620             ye => "Yemen",
621             yt => "Mayotte",
622             za => "South Africa",
623             zm => "Zambia",
624             zw => "Zimbabwe",
625 12     12   104 };
  12         25  
626              
627             my $locale_cache;
628              
629             sub parse_http_accept_language {
630 3     3 1 214 my ($string) = @_;
631              
632 3         25 my @tokens = split / *, */, $string;
633            
634 3         6 my %retval;
635 3         8 foreach my $token (@tokens) {
636 11         16 my $quality = 1;
637             # This RE is more forgiving than the standard. It accepts
638             # values greater than 1.0 and with more fractional digits
639             # than 3.
640 11 100       47 if ($token =~ s/ *; *q *= *([0-9]+(?:\.([0-9]+))?)$//) {
641 7         16 $quality = $1;
642             }
643 11         27 $retval{$token} = $quality;
644             }
645              
646             # RFC 2616 only allows 1-8 characters for language and country
647             # but we are more forgiving.
648             return grep {
649 11         54 /^[A-Za-z]+(?:-[A-Za-z]+)?$/
650             } map {
651 11 100       24 $_ = 'C' if $_ eq '*'; $_
  11         17  
652             } sort {
653 3         18 $retval{$b} <=> $retval{$a}
  12         33  
654             } keys %retval;
655             }
656              
657             sub parse_http_accept_charset {
658 1     1 1 78 my ($string) = @_;
659              
660 1         9 my @tokens = split / *, */, $string;
661            
662 1         3 my %retval;
663 1         3 foreach my $token (@tokens) {
664 3         6 my $quality = 1;
665             # This RE is more forgiving than the standard. It accepts
666             # values greater than 1.0 and with more fractional digits
667             # than 3.
668 3 100       15 if ($token =~ s/ *; *q *= *([0-9]+(?:\.([0-9]+))?)$//) {
669 2         6 $quality = $1;
670             }
671 3         7 $retval{$token} = $quality;
672             }
673              
674             return grep {
675             # This is really allowed in character set names ...
676 3         13 /^[-!\#\$\%\&\'\+\.0-9A-Z_\`a-z\|\~]+$/
677             } map {
678 3 50       7 $_ = undef if $_ eq '*'; $_
  3         6  
679             } sort {
680 1         6 $retval{$b} <=> $retval{$a}
  2         8  
681             } keys %retval;
682             }
683              
684             sub set_locale {
685 11     11 1 82 my ($category, $language, $country, $charset) = @_;
686            
687 11         78 require POSIX;
688              
689 11 50       52 $country = '' unless defined $country;
690 11 50       39 $charset = '' unless defined $charset;
691            
692 11         21 my $set_locale;
693             # Look up the cache first.
694 11 50       53 if (my $retval = $locale_cache->{$language}->{$country}->{$charset}) {
695 0         0 my ($locale, $country) = @$retval;
696 0         0 POSIX::setlocale ($category, $locale);
697 0         0 return @$retval;
698             }
699              
700             # Initialize the cache with the undefined value so that we can do
701             # error returns without setting it.
702 11         32 $locale_cache->{$language}->{$country}->{$charset} = undef;
703              
704 11 50 33     117 my $windows = ($^O !~ /darwin/i && $^O =~ /win/i) ? 1 : 0;
705 11 50       45 if ($windows) {
706 0         0 return &__set_locale_windows;
707             }
708            
709 11         23 my $set_language;
710             my $set_country;
711              
712             # First we try to only use the language.
713 11         30 my @languages = ($language);
714 11         25 my @lc_languages = map { lc $_ } @languages;
  11         52  
715 11         22 my @uc_languages = map { uc $_ } @languages;
  11         32  
716 11         21 my %seen = ();
717              
718 11         26 foreach my $language (@languages, @lc_languages, @uc_languages) {
719 33 100       108 next if $seen{$language}++;
720 22         32 warn "Trying lingua only setlocale '$language'.\n" if DEBUG;
721 22         305 my $result = POSIX::setlocale ($category, $language);
722 22 50       82 if ($result) {
723 0 0       0 $set_locale = $set_language = $result if $result;
724 0         0 last;
725             }
726             }
727              
728             # Now try it with the country appended.
729 11 50       54 my @countries = length $country ? ($country) : ();
730 11         26 my @uc_countries = map { uc $_ } @countries;
  11         36  
731 11         20 my @lc_countries = map { uc $_ } @countries;
  11         26  
732 11         29 push @countries, @uc_countries, @lc_countries;
733            
734 11         24 LINGUA: foreach my $language (@languages, @lc_languages, @uc_languages) {
735 33         47 my $count = 0;
736             my @guessed_countries = (LANG2COUNTRY->{lc $language},
737             lc LANG2COUNTRY->{lc $language},
738 33         121 uc LANG2COUNTRY->{lc $language});
739 33         58 foreach my $c (@countries, @guessed_countries) {
740 198         248 ++$count;
741 198 50 33     555 next unless defined $c && length $c;
742 198         314 my $try = $language . '_' . $c;
743 198 100       486 next if $seen{$try}++;
744 66         87 warn "Trying setlocale '$try'.\n" if DEBUG;
745 66         878 my $result = POSIX::setlocale ($category, $try);
746 66 50       200 if ($result) {
747 0         0 $set_locale = $result;
748 0 0       0 if ($count >= @countries) {
749 0         0 $set_country = $c;
750             } else {
751 0         0 $set_country = $country;
752             }
753              
754 0         0 last LINGUA;
755             }
756             }
757             }
758            
759 11 50       44 unless (length $charset) {
760 11 50 33     143 return unless defined $set_locale && length $set_locale;
761            
762 0           $locale_cache->{$language}->{$country}->{$charset} =
763             [$set_locale, $set_country];
764 0 0         return wantarray ? ($set_locale, $set_country) : $set_locale;
765             }
766            
767 0           my @charsets = ($charset);
768 0           my $cleaned = $charset;
769 0 0         push @charsets, $cleaned if $cleaned =~ s/-//g;
770 0           my @lc_charsets = map { lc $charset } @charsets;
  0            
771 0           my @uc_charsets = map { uc $charset } @charsets;
  0            
772 0           push @charsets, @lc_charsets, @uc_charsets;
773            
774 0           %seen = ();
775 0           LINGUA2: foreach my $language (@languages,
776             @lc_languages, @uc_languages) {
777             my @guessed_countries = (LANG2COUNTRY->{lc $language},
778             lc LANG2COUNTRY->{lc $language},
779 0           uc LANG2COUNTRY->{lc $language});
780 0           my $count = 0;
781 0           foreach my $c (@countries, @guessed_countries) {
782 0           ++$count;
783 0 0 0       $c = '' unless defined $c && length $c;
784 0           my $country_try = $language;
785 0 0         $country_try .= (length $c) ? "_$c" : '';
786            
787 0           foreach my $ch (@charsets, @lc_charsets, @uc_charsets) {
788 0           my $try = $country_try . '.' . $ch;
789 0 0         next if $seen{$try}++;
790 0           warn "Trying setlocale '$try'.\n" if DEBUG;
791            
792 0           my $result = POSIX::setlocale ($category, $try);
793 0 0         if ($result) {
794 0           $set_locale = $result;
795 0 0         if ($count >= @countries) {
796 0           $set_country = $c;
797             } else {
798 0           $set_country = $country;
799             }
800            
801 0           last LINGUA2;
802             }
803             }
804             }
805             }
806              
807 0 0 0       return unless defined $set_locale && length $set_locale;
808              
809 0           $locale_cache->{$language}->{$country}->{$charset} =
810             [$set_locale, $set_country];
811              
812 0 0         return wantarray ? ($set_locale, $set_country) : $set_locale;
813             }
814              
815             sub __set_locale_windows {
816 0     0     my ($category, $language, $country, $charset) = @_;
817              
818 0           my $set_locale;
819              
820 0 0         $country = '' unless defined $country;
821 0 0         $charset = '' unless defined $charset;
822            
823             # First we try to only use the language.
824 0           my $long_language = WIN32LANGUAGE->{lc $language};
825 0           my @languages = ($long_language, $language);
826 0           my %seen = ();
827 0           foreach my $language (@languages) {
828 0 0         next if $seen{$language}++;
829 0           warn "Trying lingua only setlocale '$language'.\n" if DEBUG;
830 0           my $result = POSIX::setlocale ($category, $language);
831 0 0         if ($result) {
832 0 0         $set_locale = $result if $result;
833 0           last;
834             }
835             }
836            
837             # Now try it with the country appended.
838 0           my $set_country;
839 0 0         if (length $country) {
840 0           COMBI: foreach my $language (@languages) {
841             # We do not need a fallback country here, because the "system" already
842             # provides the information.
843 0           my @short_countries = ($country);
844             my @countries = map {
845 0           WIN32COUNTRY->{lc $_}
846 0           } grep { length $_ } @short_countries;
  0            
847 0           foreach my $c (@countries) {
848 0 0 0       next unless defined $c && length $c;
849 0           my $try = $language . '_' . $c;
850 0 0         next if $seen{$try}++;
851 0           warn "Trying setlocale '$try'.\n" if DEBUG;
852 0           my $result = POSIX::setlocale ($category, $try);
853 0 0         if ($result) {
854 0           $set_locale = $result;
855 0           $set_country = $c;
856 0           last COMBI;
857             }
858             }
859             }
860             }
861              
862 0 0 0       return unless defined $set_locale && length $set_locale;
863              
864             # Apparently, there is no point in setting a charset. Even the new
865             # MS-DOS versions like 2000 or XP still have the concept of more or
866             # less fixed codepages. Switching to UTF-8 does not work.
867 0           $locale_cache->{$language}->{$country}->{$charset} =
868             [$set_locale, $set_country];
869 0 0         return wantarray ? ($set_locale, $set_country) : $set_locale;
870             }
871              
872             sub get_locale_cache {
873 0     0 1   $locale_cache;
874             }
875              
876             sub set_locale_cache {
877 0 0 0 0 1   if (ref $_[0] && 'HASH' eq ref $_[0]) {
878 0           $locale_cache = $_[0];
879             } else {
880 0           my %locale_cache = @_;
881 0           $locale_cache = \%locale_cache;
882             }
883             }
884              
885             sub web_set_locale {
886 0     0 1   my ($accept_language, $accept_charset, $category, $available) = @_;
887              
888 0           my %available;
889 0 0         if ($available) {
890 0           foreach (@$available) {
891 0           my $locale = $_;
892 0           $locale =~ s/[_\@\.].*//;
893 0           $available{lc $locale} = 1;
894             }
895             }
896              
897 0           my @languages;
898 0 0 0       if (ref $accept_language && 'ARRAY' eq ref $accept_language) {
899 0           @languages = @$accept_language;
900             } else {
901 0           @languages = parse_http_accept_language $accept_language;
902             }
903              
904 0 0         if ($available) {
905 0           my @all = @languages;
906 0           @languages = ();
907 0           foreach my $locale (@all) {
908 0           my $language = lc $locale;
909 0           $language =~ s/[_\@\.].*//;
910 0 0         push @languages, $locale if $available{$language};
911             }
912             }
913              
914 0           my @charsets;
915 0 0         if (defined $accept_charset) {
916 0 0 0       if (ref $accept_charset && 'ARRAY' eq ref $accept_charset) {
917 0           @charsets = @$accept_charset;
918             } else {
919 0           @charsets = parse_http_accept_charset $accept_charset;
920             }
921             }
922              
923 0 0         unless (defined $category) {
924 0           require POSIX;
925 0           $category = POSIX::LC_ALL();
926             }
927              
928 0           my ($set_locale, $set_language, $set_country, $set_charset);
929 0           foreach my $lang (@languages) {
930 0           my ($language, $country) = split /-/, $lang, 2;
931              
932 0           my ($locale, $country_used) =
933             set_locale ($category, $language, $country, $charsets[0]);
934            
935 0 0         if (defined $locale) {
936             # If a country was specified, we have to check whether it
937             # was actually selected.
938 0 0         if (defined $country) {
939 0 0 0       if (!defined $country
940             || ($country ne $country_used)) {
941 0           $set_language = $language;
942 0           $set_locale = $locale;
943 0           $set_country = $country_used;
944 0           $set_charset = $charsets[0];
945             }
946             }
947              
948 0 0         if (wantarray) {
949 0           return $locale, $lang, $country_used, $charsets[0];
950             } else {
951 0           return $locale;
952             }
953             }
954             }
955            
956 0 0         if (defined $set_locale) {
957 0 0         if (wantarray) {
958 0           return $set_locale, $set_language, $set_country, $set_charset;
959             } else {
960 0           return $set_locale;
961             }
962             }
963              
964 0           return;
965             }
966              
967             1;
968              
969             __END__