File Coverage

blib/lib/TeX/Hyphen/Pattern.pm
Criterion Covered Total %
statement 106 109 100.0
branch 20 22 100.0
condition n/a
subroutine 16 16 100.0
pod 3 3 100.0
total 145 150 100.0


line stmt bran cond sub pod time code
1             # -*- cperl; cperl-indent-level: 4 -*-
2             # Copyright (C) 2009-2021, Roland van Ipenburg
3             package TeX::Hyphen::Pattern v1.1.7;
4 4     4   437596 use Moose;
  4         1762637  
  4         27  
5 4     4   28286 use 5.014000;
  4         13  
6 4     4   26 use utf8;
  4         7  
  4         42  
7              
8 4     4   2989 use English '-no_match_vars';
  4         16078  
  4         22  
9 4     4   4626 use Log::Log4perl qw(:easy get_logger);
  4         181178  
  4         22  
10 4     4   4908 use Set::Scalar ();
  4         41666  
  4         92  
11 4     4   1109 use Encode ();
  4         28417  
  4         132  
12             use Module::Pluggable
13 4         31 'sub_name' => '_available',
14             'search_path' => ['TeX::Hyphen::Pattern'],
15 4     4   1946 'require' => 1;
  4         31231  
16              
17 4     4   3384 use File::Temp ();
  4         64133  
  4         107  
18              
19 4     4   1510 use Readonly ();
  4         10692  
  4         5464  
20             ## no critic (ProhibitCallsToUnexportedSubs)
21             Readonly::Scalar my $EMPTY => q{};
22             Readonly::Scalar my $DASH => q{-};
23             Readonly::Scalar my $UNDERSCORE => q{_};
24             Readonly::Scalar my $TEX_COMMENT_LINE => q{%};
25             Readonly::Scalar my $CARON_ESCAPE => q{"};
26             Readonly::Scalar my $CLASS_BEGIN => q{[};
27             Readonly::Scalar my $CLASS_END => q{]};
28             Readonly::Scalar my $DEFAULT_LABEL => q{en-US};
29             Readonly::Scalar my $UTF8 => q{:utf8};
30             Readonly::Scalar my $PLUGGABLE => q{TeX::Hyphen::Pattern::};
31             Readonly::Scalar my $TEX_PATTERN_START => qq@\\patterns{\n#@;
32             Readonly::Scalar my $TEX_PATTERN_FINISH => qq@\n}@;
33             Readonly::Scalar my $TEX_INPUT_COMMAND => q{\\\input\s+hyph-(.*?)\.tex};
34             Readonly::Scalar my $TEX_MESSAGE => q{\\\message};
35              
36             Readonly::Scalar my $ERR_CANT_WRITE => q{Can't write to file '%s', stopped %s};
37              
38             Readonly::Hash my %FALLBACK => (
39             'De_DE' => q{De_1996_ec},
40             'Af_za' => q{Af_ec},
41             'Da_DK' => q{Da_ec},
42             'Et_ee' => q{Et_ec},
43             'Fr_fr' => q{Fr_ec},
44             'It_it' => q{It},
45             'Lt_lt' => q{Lt},
46             'Nl_nl' => q{Nl},
47             'Pl_pl' => q{Pl},
48             'Pt_br' => q{Pt},
49             'Sh' => q{Sh_latn},
50             'Sl' => q{Sl},
51             );
52              
53             Readonly::Hash my %LOG => (
54             'MATCH_MODULE' => q{Looking for a match for '%s'},
55             'NO_MATCH_CS' => q{No case sensitive pattern match found for '%s'},
56             'NO_MATCH_CI' => q{No case insensitive pattern match found for '%s'},
57             'NO_MATCH_PARTIAL' => q{No partial pattern match found for '%s'},
58             'NO_MATCH' => q{No pattern match found for '%s'},
59             'MATCHES' => q{Pattern match(es) found '%s'},
60             'CACHE_HIT' => q{Cache hit for '%s'},
61             'CACHE_MISS' => q{Cache miss for '%s'},
62             'FILE_UNDEF' => q{Returning undef file for '%s'},
63             'PATCH_OPENOFFICE' => q{Patching OpenOffice.org pattern},
64             'PATCH_TEX_INPUT' => q{Patching TeX pattern with \input},
65             'PATCH_CARONS' => q{Patching "x encoded carons},
66             'PATCH_TEX_MESSAGE' => q{Patching TeX pattern with \message},
67             'DELETING' => q{Deleting %d temporary file(s) %s},
68             'DELETE_FAIL' => q{Could not delete all temporary files},
69             'DELETE_SUCCES' => q{Deleted all temporary files},
70             );
71             Readonly::Hash my %CARON_MAP => ( q{c} => q{č}, q{s} => q{š}, q{z} => q{ž} );
72             ## use critic
73              
74             Log::Log4perl->easy_init($ERROR);
75             my $log = get_logger();
76              
77             ## no critic (ProhibitCallsToUndeclaredSubs)
78             has 'label' => ( 'is' => 'rw', 'isa' => 'Str', 'default' => $DEFAULT_LABEL );
79             has '_cache' => ( 'is' => 'rw', 'isa' => 'HashRef', 'default' => sub { {} } );
80             has '_plugs' => ( 'is' => 'rw', 'isa' => 'ArrayRef', 'default' => sub { [] } );
81             ## use critic
82              
83             sub filename {
84 153     153 1 1524 my ($self) = @_;
85 153 100       4482 if ( exists $self->_cache->{ $self->label } ) {
86 1         16 $log->debug( sprintf $LOG{'CACHE_HIT'}, $self->label );
87 1         47 return $self->_cache->{ $self->label };
88             }
89 152         4454 $log->debug( sprintf $LOG{'CACHE_MISS'}, $self->label );
90              
91             # Return undef if the label could not be matched to a pattern:
92 152 100       4542 if ( !$self->_replug() ) {
93 1         9 $log->warn( sprintf $LOG{'FILE_UNDEF'}, $self->label );
94 1         29 return;
95             }
96 151         4177 my $patterns = $self->_plugs->[0]->pattern_data();
97              
98             # Strip comments to prevent parsing of commands in comments
99             ## no critic qw(RequireDotMatchAnything)
100 151         36513 $patterns =~ s{^$TEX_COMMENT_LINE.*?$}{}gixm;
101             ## use critic
102              
103             # Take care of \input command in TeX:
104 151     1   177953 while ( my ($module) = $patterns =~ /$TEX_INPUT_COMMAND/xmis ) {
  1         11  
  1         3  
  1         17  
105 4         48 $log->debug( $LOG{'PATCH_TEX_INPUT'} );
106 4         54 $module = $PLUGGABLE . ucfirst $module;
107 4         22 my $input_patterns = $module->new()->pattern_data();
108 4         26 $patterns =~ s/$TEX_INPUT_COMMAND/$input_patterns/xmgis;
109             }
110              
111             # Take care of "x encoded carons:
112 151         46569 my $caron = $CARON_ESCAPE . $CLASS_BEGIN . join $EMPTY,
113             keys(%CARON_MAP) . $CLASS_END;
114 151         4030 $log->debug( $LOG{'PATCH_CARONS'} );
115 151         30998 $patterns =~ s{($caron)}{$CARON_MAP{$1}}xmgis;
116              
117             # Take care of \message command in TeX that TeX::Hyphen can't handle:
118             # uncoverable branch true
119 151 50       80658 if ( $patterns =~ /^$TEX_MESSAGE/xmgis ) {
120 0         0 $log->debug( $LOG{'PATCH_TEX_MESSAGE'} ); # uncoverable statement
121             # uncoverable statement
122 0         0 $patterns =~ s{^($TEX_MESSAGE)}{$TEX_COMMENT_LINE$1}xmgis;
123             }
124              
125             # Patch OpenOffice.org pattern data for TeX::Hyphen:
126 151 100       5293 if ( $patterns !~ /\\patterns/xmgis ) {
127 10         62 $log->debug( $LOG{'PATCH_OPENOFFICE'} );
128 10         392 $patterns = $TEX_PATTERN_START . $patterns . $TEX_PATTERN_FINISH;
129             }
130              
131 151         2058 my $fh = File::Temp->new();
132 151         100896 binmode $fh, $UTF8;
133 151         633 $fh->unlink_on_destroy(0);
134              
135             # uncoverable branch true
136 151 50       1419 if ( !print {$fh} $patterns ) {
  151         51211  
137              
138             # uncoverable statement
139 0         0 $log->logdie( sprintf $ERR_CANT_WRITE, ( $fh->filename, $ERRNO ) );
140             }
141 151         524 my %cache = %{ $self->_cache };
  151         5992  
142 151         1510 $cache{ $self->label } = $fh->filename;
143 151         13272 $self->_cache( {%cache} );
144 151         1629 return $fh->filename;
145             }
146              
147             sub available {
148 157     157 1 597 my ($self) = @_;
149 22922         39205 return map { ref }
150 22922         121398 grep { $_->version == $TeX::Hyphen::Pattern::VERSION }
151 157         878 map { $_->new() } $self->_available;
  22922         18618806  
152             }
153              
154             sub packaged {
155 4     4 1 198597 my ($self) = @_;
156 4         31 return $self->_available;
157             }
158              
159             sub _replug {
160 152     152   427 my ($self) = @_;
161 152         4196 my $module = ucfirst $self->label;
162 152         673 $module =~ s/$DASH/$UNDERSCORE/xmgis;
163 152         366 my $label = $module;
164 152         508 $module = $PLUGGABLE . $module;
165              
166             # Find a match with decreasing strictness:
167 152         722 $log->debug( sprintf $LOG{'MATCH_MODULE'}, $module );
168 152         2670 my @available = grep { /^$module$/xmgs } $self->available();
  22192         87924  
169 152 100       2285 if ( !@available ) {
170 2         25 $log->info( sprintf $LOG{'NO_MATCH_CS'}, $module );
171 2         61 @available = grep { /^$module$/xmgis } $self->available();
  292         19811  
172             }
173 152 100       1556694 if ( !@available ) {
174 2         25 $log->warn( sprintf $LOG{'NO_MATCH_CI'}, $module );
175 2         61 @available = grep { /^$module/xmgis } $self->available();
  292         19645  
176             }
177 152 100       577 if ( !@available ) {
178 2         26 $log->warn( sprintf $LOG{'NO_MATCH_PARTIAL'}, $module );
179 2 100       63 if ( exists $FALLBACK{$label} ) {
180 1         10 $module = $PLUGGABLE . $FALLBACK{$label};
181 1         12 @available = grep { /^$module/xmgis } $self->available();
  146         10145  
182             }
183             }
184 152         644 @available = sort @available;
185 152         2018 $log->info( sprintf $LOG{'MATCHES'}, join q{, }, @available );
186 152 100       4595 @available || $log->warn( sprintf $LOG{'NO_MATCH'}, $module );
187 152         439 $self->_plugs( [ map { $_->new() } @available ] );
  151         788  
188 152         13261 return 0 + @available;
189             }
190              
191             sub DESTROY {
192 6     6   1126424 my ($self) = @_;
193 6         58 my @temp_files = values %{ $self->_cache };
  6         312  
194 6         86 $log->debug( sprintf $LOG{'DELETING'},
195             ( 0 + @temp_files, join ', ', @temp_files ) );
196 6         180386 my $deleted = unlink @temp_files;
197             ( $deleted != ( 0 + @temp_files ) )
198             ? $log->warn( $LOG{'DELETE_FAIL'} )
199 6 100       96 : $log->debug( $LOG{'DELETE_SUCCES'} );
200 6         246 return;
201             }
202              
203             1;
204             __END__
205              
206             =encoding utf8
207              
208             =for stopwords Bitbucket CPAN OpenOffice Readonly Subtags Apali tex Ipenburg
209              
210             =head1 NAME
211              
212             TeX::Hyphen::Pattern - class for providing a collection of TeX hyphenation
213             patterns for use with TeX::Hyphen.
214              
215             =head1 VERSION
216              
217             This is version C<v1.1.7>. To prevent plugging in of incompatible modules the
218             version of the pluggable modules must be the same as this module.
219              
220             =head1 SYNOPSIS
221              
222             use TeX::Hyphen;
223             use TeX::Hyphen::Pattern;
224              
225             $pat = TeX::Hyphen::Pattern->new();
226             $pat->label('Sh_ltn'); # Serbocroatian hyphenation patterns
227             $hyph = TeX::Hyphen->new($pat->filename);
228              
229             =head1 DESCRIPTION
230              
231             The L<TeX::Hyphen|TeX::Hyphen> module parses TeX files containing hyphenation
232             patterns for use with TeX based systems. This module includes TeX hyphenation
233             files from L<CPAN|http://www.ctan.org> and hyphenation patterns from
234             L<OpenOffice|http://www.openoffice.org> and provides a single interface to
235             use them in L<TeX::Hyphen|TeX::Hyphen>.
236              
237             =over 4
238              
239             =item L<http://tug.org/svn/texhyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/>
240              
241             =item L<http://svn.services.openoffice.org/ooo/trunk/dictionaries/>
242              
243             =back
244              
245             =head1 SUBROUTINES/METHODS
246              
247             =over 4
248              
249             =item TeX::Hyphen::Pattern-E<gt>new();
250              
251             =item TeX::Hyphen::Pattern-E<gt>new(label => $label);
252              
253             Constructs a new TeX::Hyphen::Pattern object.
254              
255             =item $pattern-E<gt>label($label);
256              
257             Sets the label that determines the pattern to use. The label can be a simple
258             language code, but since some languages can use multiple scripts with
259             different hyphenation rules we talk about patterns and not just languages.
260              
261             =item $pattern-E<gt>available();
262              
263             Returns a list of the available patterns.
264              
265             =item $pattern-E<gt>packaged();
266              
267             Returns a list of the available patterns. (alias for available)
268              
269             =item $pattern-E<gt>filename();
270              
271             Returns the name of a temporary file that TeX::Hyphen can read it's pattern
272             from for the current label. Returns C<undef> if no pattern language matching the
273             label was found.
274              
275             =back
276              
277             =head1 CONFIGURATION AND ENVIRONMENT
278              
279             The script F<tools/build_catalog_from_ctan.pl> was used to get the TeX
280             patterns file from the source on the internet and include them in this module.
281             After that the copyright messages were manually checked and inserted to make
282             sure this distribution complies with them.
283              
284             =head1 DEPENDENCIES
285              
286             =over 4
287              
288             =item L<Moose|Moose>
289             =item L<Encode|Encode>
290             =item L<File::Temp|File::Temp>
291             =item L<Log::Log4perl|Log::Log4perl>
292             =item L<Module::Pluggable|Module::Pluggable>
293             =item L<Readonly|Readonly>
294             =item L<Set::Scalar|Set::Scalar>
295              
296             =back
297              
298             L<TeX::Hyphen|TeX::Hyphen> is only a test requirement of
299             C<TeX::Hyphen::Pattern>. You might want to use the patterns in another way and
300             this module then just provides them independent of L<TeX::Hyphen|TeX::Hyphen>.
301              
302             =head1 INCOMPATIBILITIES
303              
304             =over 4
305              
306             Not all available pattern files are parsed correctly by
307             L<TeX::Hyphen|TeX::Hyphen>.
308              
309             =back
310              
311             =head1 DIAGNOSTICS
312              
313             This module uses L<Log::Log4perl|Log::Log4perl> for logging. It's a fatal
314             error when the temporary file containing the pattern can't be written.
315              
316             =over 4
317              
318             =item C<Can't write to file '%s', stopped %s>
319              
320             The temporary file created by L<File::Temp|File::Temp> could not be written.
321              
322             =back
323              
324             =head1 BUGS AND LIMITATIONS
325              
326             =over 4
327              
328             =item * Subtags aren't handled: C<en> could pick C<en_US>, C<en_UK> or C<ena>
329             (when Apali would be available) and this is silently ignored, it just does a
330             match on the string and picks what partly matches sorted, so using more exotic
331             scripts this can go wrong badly.
332              
333             =back
334              
335             Please report any bugs or feature requests at
336             L<Bitbucket|
337             https://bitbucket.org/rolandvanipenburg/tex-hyphen-pattern/issues>.
338              
339             =head1 AUTHOR
340              
341             Roland van Ipenburg, E<lt>roland@rolandvanipenburg.comE<gt>
342              
343             =head1 LICENSE AND COPYRIGHT
344              
345             Copyright 2009-2021 by Roland van Ipenburg
346              
347             This library is free software; you can redistribute it and/or modify
348             it under the same terms as Perl itself, either Perl version 5.10.0 or,
349             at your option, any later version of Perl 5 you may have available.
350              
351             The included pattern files in lib/TeX/Hyphen/Pattern/ are licensed as stated
352             in those files.
353              
354             =head1 DISCLAIMER OF WARRANTY
355              
356             BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
357             FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
358             OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
359             PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
360             EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
361             WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
362             ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
363             YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
364             NECESSARY SERVICING, REPAIR, OR CORRECTION.
365              
366             IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
367             WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
368             REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENSE, BE
369             LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
370             OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
371             THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
372             RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
373             FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
374             SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
375             SUCH DAMAGES.
376              
377             =cut