File Coverage

blib/lib/Lingua/Translate.pm
Criterion Covered Total %
statement 85 94 90.4
branch 27 44 61.3
condition 12 29 41.3
subroutine 15 15 100.0
pod 4 4 100.0
total 143 186 76.8


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             package Lingua::Translate;
4              
5 3     3   29360 use strict;
  3         7  
  3         78  
6 2     2   11 use Carp;
  2         4  
  2         294  
7              
8 2     2   17 use vars qw($VERSION $defaults);
  2         5  
  2         318  
9             $VERSION = '0.09';
10              
11             =head1 NAME
12              
13             Lingua::Translate - Translate text from one language to another
14              
15             =head1 SYNOPSIS
16              
17             use Lingua::Translate;
18              
19             my $xl8r = Lingua::Translate->new(src => "en",
20             dest => "de")
21             or die "No translation server available for en -> de";
22              
23             my $english = "I would like some cigarettes and a box of matches";
24              
25             my $german = $xl8r->translate($english); # dies or croaks on error
26              
27             # prints "Mein Luftkissenfahrzeug ist voll von den Aalen";
28             print $german;
29              
30             =head1 DESCRIPTION
31              
32             Locale::Translate translates text from one written language to
33             another. Currently this is implemented by contacting Babelfish
34             (http://babelfish.altavista.com/), so see there for the language pairs
35             that are supported. Babelfish uses SysTran (http://www.systran.org/)
36             to perform the translation, and contacting a SysTran translation
37             server directly is also supported (in case your translation needs grow
38             beyond babelfish' capacity).
39              
40             =head1 OVERVIEW
41              
42             To translate text, you first have to obtain a translation "handle" for
43             the language pair (source language, destination language) that you are
44             translating, using a constructor (see CONSTRUCTORS, below). This is
45             returned as a perl object. You can then use this handle to translate
46             arbitrary text, using the "translate" method (see METHODS, below).
47              
48             Depending on the back-end that you are using, either the constructor
49             or the translation will open a connection to a translation server. If
50             there are any network errors or timeouts, then an exception will be
51             thrown. If you want to check for this type of error, you will need to
52             wrap both the constructor and the translation function in an eval { }
53             block.
54              
55             If you are using a systrans server, you will need to use the "config"
56             function to tell this module where your translation server is running,
57             and the port that it is listening on.
58              
59             Translating is generally a heavily expensive task; you should try to
60             save the results you get back from this module somewhere so that you
61             do not overload Babelfish.
62              
63             =head1 CONSTRUCTORS
64              
65             =cut
66              
67             # I'm not sure whether the "src", "dest" options should be hard coded
68             # like this. Perhaps they should just be treated as configuration
69             # options. But I think they deserve special treatment.
70              
71             =head2 new(src => $lang, dest => $lang)
72              
73             This function creates a new translation handle and returns it. It
74             takes the following construction options, passed as Option => "value"
75             pairs:
76              
77             =over
78              
79             =item src
80              
81             The source language, in RFC3066 form. See L. There
82             is no default.
83              
84             =item dest
85              
86             The destination language in the same form. There is no default.
87              
88             =back
89              
90             Additionally, any configuration option that is normally passed to the
91             "config" function (see below) may be passed to the "new" constructor
92             as well.
93              
94             =cut
95              
96 2     2   1872 use I18N::LangTags qw(is_language_tag);
  2         6166  
  2         192  
97              
98             # HACK to do without Unicode::MapUTF8, as it can be very hard to
99             # install, particularly on older Perls
100 2     2   13 use vars qw($have_map_utf8);
  2         3  
  2         106  
101             BEGIN {
102 2     2   130 eval "use Unicode::MapUTF8 qw(from_utf8 to_utf8 "
  2     2   1707  
  2         278710  
  2         227  
103             ."utf8_supported_charset);";
104 2 50       2405 $have_map_utf8 = 1 unless $@;
105             }
106              
107             sub new {
108 2     2 1 722 my $class = shift;
109 2         18 my %options = @_;
110              
111 2         9 my $self = bless { }, $class;
112              
113 2 50 33     24 croak "Must supply source and destination language"
114             unless (defined $options{src} and defined $options{dest});
115              
116 2 50       26 is_language_tag($self->{src} = delete $options{src})
117             or croak "$self->{src} is not a valid RFC3066 language tag";
118              
119 2 50       35 is_language_tag($self->{dest} = delete $options{dest})
120             or croak "$self->{dest} is not a valid RFC3066 language tag";
121              
122             # allow custom back end
123 2   33     38 $self->{back_end} = load_back_end(delete $options{back_end}
124             || $defaults->{back_end});
125              
126             # allow encoding
127 2         6 for my $option ( qw(src_enc dest_enc) ) {
128 4 50       17 $self->config($option => delete $options{$option})
129             if (exists $options{$option});
130             }
131              
132 2 50       14 $self->{back_end_options} = \%options if (keys %options);
133              
134 2         9 bless $self, $class;
135             }
136              
137             # creates the $self->{worker} attribute
138              
139             sub _create_worker {
140              
141 2     2   5 my $self = shift;
142              
143             # For flexibility, we allow two methods of configuration; if
144             # the module defines &save_config(), then we call that
145             # function, then call config(%options), new(), then
146             # &restore_config().
147 2 50 33     12 if ( $self->{back_end_options} and
148             my $code_ref = $self->{back_end}->can("save_config") ) {
149 0         0 my $saved_config = $code_ref->();
150 0         0 $self->{back_end}->config(%{$self->{back_end_options}});
  0         0  
151 0         0 $self->{worker} =
152             $self->{back_end}->new( src => $self->{src},
153             dest => $self->{dest} );
154 0         0 $self->{back_end}->restore_config($saved_config);
155             } else {
156             # If they don't define that function, then we just call
157             # their &new() function with the remaining options as a
158             # parameter
159 2 50       23 $self->{worker} =
160             $self->{back_end}->new( src => $self->{src},
161             dest => $self->{dest},
162 2         9 %{$self->{back_end_options}||{}}
163             );
164             }
165              
166             }
167              
168             =head1 METHODS
169              
170             =head2 translate($text) : $text
171              
172             Translates $text and returns the translated text. die on any error.
173              
174             =cut
175              
176             sub translate {
177 2     2 1 1042 my ($self, $text) = map { shift } (1..2);
  4         22  
178              
179 2   33     100 my $source_encoding = $self->{src_enc} || $defaults->{src_enc};
180              
181             # BACK-ENDS MUST DEAL IN utf8
182 2 100 66     17 if ( $have_map_utf8 and lc($source_encoding) ne "utf8" ) {
183 1         8 $text = to_utf8( -string => $text,
184             -charset => $source_encoding);
185             }
186              
187             # create a new worker if we need to
188 2 50       5996 $self->_create_worker unless $self->{worker};
189              
190 2         4 my $translated;
191 2         8 for (1..3) {
192 4         8 eval { $translated = $self->{worker}->translate($text) };
  4         23  
193 4 100       344 last unless $@;
194             }
195              
196 2 100       10 if ( $translated ) {
197 1   33     10 my $dest_encoding = $self->{dest_enc} || $defaults->{dest_enc};
198 1 50 33     10 if ( $have_map_utf8 and lc($dest_encoding) ne "utf8" ) {
199 1         8 $translated = from_utf8( -string => $translated,
200             -charset => $dest_encoding);
201             }
202 1         538 return $translated;
203             } else {
204 1         395 die "Translation back-end failed; $@";
205             }
206             }
207              
208             =head1 CONFIGURATION FUNCTIONS
209              
210             This collection of functions configures general operation of the
211             Lingua::Translate module, which is stored in package scoped variables.
212              
213             These options only affect the construction of new objects, not the
214             operation of existing objects.
215              
216             =head2 load_back_end($backend)
217              
218             This function loads the specified back-end. Used internally by
219             config(). Returns the package name of the backend.
220              
221             =cut
222              
223             sub load_back_end {
224 5     5 1 13 my ($back_end) = (@_);
225              
226 5 100       27 if ( $back_end !~ m/::/ ) {
227 3         10 $back_end = "Lingua::Translate::$back_end";
228             }
229 2     2   1466 eval "use $back_end;";
  2     2   5  
  2         46  
  2         765  
  2         7  
  2         39  
  5         421  
230 5 50       21 if ( $@ ) {
231 0         0 croak "Back end $back_end not available; $@";
232             }
233              
234 5         28 return $back_end;
235             }
236              
237             =head2 config(option => $value)
238              
239             This function sets defaults for use when constructing new objects; it
240             does not affect already constructed objects.
241              
242             =cut
243              
244             sub config {
245              
246 7     7 1 978 my ($self, $target);
247 7 50       64 if ( UNIVERSAL::isa($_[0], __PACKAGE__) ) {
248 0         0 $self = shift;
249             } else {
250 7   100     47 $self = $defaults ||= {};
251             }
252              
253 7         45 while ( my ($option, $value) = splice @_, 0, 2 ) {
254              
255 9 100       37 if ( $option eq "back_end" ) {
    100          
    50          
    0          
256              
257             # the user is selecting a back end. Load it
258 3         16 $self->{back_end} = load_back_end($value);
259              
260             # if there is a worker, kill it - it will be re-created on
261             # the next translate() call.
262 3         39 delete $self->{worker};
263              
264             } elsif ( $option eq "src_enc" ) {
265             # set the source encoding
266              
267 3 50 33     26 croak "Charset `$value' not supported by Unicode::MapUTF8"
268             if ($have_map_utf8 and
269             not utf8_supported_charset($value));
270              
271 3         56 $self->{src_enc} = $value;
272              
273             } elsif ( $option eq "dest_enc" ) {
274              
275 3 50 33     23 croak "Charset `$value' not supported by Unicode::MapUTF8"
276             if ($have_map_utf8 and
277             not utf8_supported_charset($value));
278              
279 3         46 $self->{dest_enc} = $value;
280              
281             } elsif (
282             my $code_ref = UNIVERSAL::can ($self->{back_end},
283             "config")
284             ) {
285              
286             # call the back-end's configuration function
287 0           $code_ref->($option => $value);
288              
289             } else {
290 0           croak "Unknown configuration option $option";
291             }
292             }
293             }
294              
295             # extract the default values from the POD
296             use Pod::Constants
297             'CONFIGURATION FUNCTIONS' => sub {
298             Pod::Constants::add_hook
299             ('*item' => sub {
300 6         2892 my ($varname) = m/(\w+)/;
301 6         33 my ($default) = m/The default value is "(.*)"\./;
302 6         18 config($varname => $default);
303             }
304 2         6038 );
305             Pod::Constants::add_hook
306             (
307             '*back' => sub {
308 2         221 Pod::Constants::delete_hook('*item');
309 2         38 Pod::Constants::delete_hook('*back');
310             }
311 2         64 );
312 2     2   2015 };
  2         42739  
  2         32  
313              
314             =over
315              
316             =item back_end
317              
318             This specifies the method to use for translation. Currently supported
319             values are "Babelfish" and "SysTran". The case is significant.
320              
321             The default value is "Babelfish".
322              
323             Setting this option will "use" the appropriate back-end module from
324             Lingua::Translate::*, which should be a derived class of
325             Lingua::Translate.
326              
327             If the configuration option requested is not found, and a back-end is
328             configured, then that back-end's config function is called with the
329             options.
330              
331             =item src_enc
332              
333             Character set encoding assumed for input text. If the
334             C module is not available, this option has no effect
335             and strings are passed on to the translation back end without
336             processing.
337              
338             The default value is "utf8".
339              
340             =item dest_enc
341              
342             Character set encoding assumed for returned text. If the
343             C module is not available, this option has no effect
344             and strings are passed back from the translation back end as is.
345              
346             The default value is "utf8".
347              
348             =back
349              
350             This function can also be called as an instance method (ie
351             $object->config(name => value), in which case it configures that
352             object only.
353              
354             =head1 BUGS/TODO
355              
356             No mechanism for backends registering which language pairs they have
357             along with a priority, so that the most efficient back-end for a
358             translation can be selected automatically.
359              
360             Some much shorter invocation rules, suitable for one liners, etc.
361              
362             I don't have access to a non-European character set version of
363             SysTran, so translation to/from non-ISO-8859-1 character sets is not
364             currently possible.
365              
366             Need to formalise and define the "Interface" that the back-end modules
367             adhere to.
368              
369             =head1 SEE ALSO
370              
371             L, L,
372             L
373              
374             The original interface to the fish - L, by Daniel
375             J. Urist
376              
377             =head1 AUTHOR
378              
379             Sam Vilain,
380              
381             =cut
382              
383             4;