File Coverage

blib/lib/TeX/Hyphen/Pattern.pm
Criterion Covered Total %
statement 106 109 100.0
branch 20 22 100.0
condition n/a
subroutine 16 16 100.0
pod 3 3 100.0
total 145 150 100.0


line stmt bran cond sub pod time code
1             # -*- cperl; cperl-indent-level: 4 -*-
2             # Copyright (C) 2009-2021, Roland van Ipenburg
3             package TeX::Hyphen::Pattern v1.1.6;
4 4     4   393508 use Moose;
  4         1601882  
  4         25  
5 4     4   24343 use 5.014000;
  4         13  
6 4     4   21 use utf8;
  4         7  
  4         35  
7              
8 4     4   2535 use English '-no_match_vars';
  4         13918  
  4         20  
9 4     4   4592 use Log::Log4perl qw(:easy get_logger);
  4         168091  
  4         20  
10 4     4   4425 use Set::Scalar ();
  4         36983  
  4         86  
11 4     4   969 use Encode ();
  4         25214  
  4         127  
12             use Module::Pluggable
13 4         27 'sub_name' => '_available',
14             'search_path' => ['TeX::Hyphen::Pattern'],
15 4     4   1706 'require' => 1;
  4         27746  
16              
17 4     4   2868 use File::Temp ();
  4         56579  
  4         95  
18              
19 4     4   1369 use Readonly ();
  4         9821  
  4         4670  
20             ## no critic (ProhibitCallsToUnexportedSubs)
21             Readonly::Scalar my $EMPTY => q{};
22             Readonly::Scalar my $DASH => q{-};
23             Readonly::Scalar my $UNDERSCORE => q{_};
24             Readonly::Scalar my $TEX_COMMENT_LINE => q{%};
25             Readonly::Scalar my $CARON_ESCAPE => q{"};
26             Readonly::Scalar my $CLASS_BEGIN => q{[};
27             Readonly::Scalar my $CLASS_END => q{]};
28             Readonly::Scalar my $DEFAULT_LABEL => q{en-US};
29             Readonly::Scalar my $UTF8 => q{:utf8};
30             Readonly::Scalar my $PLUGGABLE => q{TeX::Hyphen::Pattern::};
31             Readonly::Scalar my $TEX_PATTERN_START => qq@\\patterns{\n#@;
32             Readonly::Scalar my $TEX_PATTERN_FINISH => qq@\n}@;
33             Readonly::Scalar my $TEX_INPUT_COMMAND => q{\\\input\s+hyph-(.*?)\.tex};
34             Readonly::Scalar my $TEX_MESSAGE => q{\\\message};
35              
36             Readonly::Scalar my $ERR_CANT_WRITE => q{Can't write to file '%s', stopped %s};
37              
38             Readonly::Hash my %FALLBACK => (
39             'De_DE' => q{De_1996_ec},
40             'Af_za' => q{Af_ec},
41             'Da_DK' => q{Da_ec},
42             'Et_ee' => q{Et_ec},
43             'Fr_fr' => q{Fr_ec},
44             'It_it' => q{It},
45             'Lt_lt' => q{Lt},
46             'Nl_nl' => q{Nl},
47             'Pl_pl' => q{Pl},
48             'Pt_br' => q{Pt},
49             'Sh' => q{Sh_latn},
50             'Sl' => q{Sl},
51             );
52              
53             Readonly::Hash my %LOG => (
54             'MATCH_MODULE' => q{Looking for a match for '%s'},
55             'NO_MATCH_CS' => q{No case sensitive pattern match found for '%s'},
56             'NO_MATCH_CI' => q{No case insensitive pattern match found for '%s'},
57             'NO_MATCH_PARTIAL' => q{No partial pattern match found for '%s'},
58             'NO_MATCH' => q{No pattern match found for '%s'},
59             'MATCHES' => q{Pattern match(es) found '%s'},
60             'CACHE_HIT' => q{Cache hit for '%s'},
61             'CACHE_MISS' => q{Cache miss for '%s'},
62             'FILE_UNDEF' => q{Returning undef file for '%s'},
63             'PATCH_OPENOFFICE' => q{Patching OpenOffice.org pattern},
64             'PATCH_TEX_INPUT' => q{Patching TeX pattern with \input},
65             'PATCH_CARONS' => q{Patching "x encoded carons},
66             'PATCH_TEX_MESSAGE' => q{Patching TeX pattern with \message},
67             'DELETING' => q{Deleting %d temporary file(s) %s},
68             'DELETE_FAIL' => q{Could not delete all temporary files},
69             'DELETE_SUCCES' => q{Deleted all temporary files},
70             );
71             Readonly::Hash my %CARON_MAP => ( q{c} => q{č}, q{s} => q{š}, q{z} => q{ž} );
72             ## use critic
73              
74             Log::Log4perl->easy_init($ERROR);
75             my $log = get_logger();
76              
77             ## no critic (ProhibitCallsToUndeclaredSubs)
78             has 'label' => ( 'is' => 'rw', 'isa' => 'Str', 'default' => $DEFAULT_LABEL );
79             has '_cache' => ( 'is' => 'rw', 'isa' => 'HashRef', 'default' => sub { {} } );
80             has '_plugs' => ( 'is' => 'rw', 'isa' => 'ArrayRef', 'default' => sub { [] } );
81             ## use critic
82              
83             sub filename {
84 153     153 1 1289 my ($self) = @_;
85 153 100       4362 if ( exists $self->_cache->{ $self->label } ) {
86 1         16 $log->debug( sprintf $LOG{'CACHE_HIT'}, $self->label );
87 1         50 return $self->_cache->{ $self->label };
88             }
89 152         3863 $log->debug( sprintf $LOG{'CACHE_MISS'}, $self->label );
90              
91             # Return undef if the label could not be matched to a pattern:
92 152 100       4242 if ( !$self->_replug() ) {
93 54         1563 $log->warn( sprintf $LOG{'FILE_UNDEF'}, $self->label );
94 54         1246 return;
95             }
96 98         2413 my $patterns = $self->_plugs->[0]->pattern_data();
97              
98             # Strip comments to prevent parsing of commands in comments
99             ## no critic qw(RequireDotMatchAnything)
100 98         16205 $patterns =~ s{^$TEX_COMMENT_LINE.*?$}{}gixm;
101             ## use critic
102              
103             # Take care of \input command in TeX:
104 98     1   111652 while ( my ($module) = $patterns =~ /$TEX_INPUT_COMMAND/xmis ) {
  1         7  
  1         1  
  1         11  
105 4         47 $log->debug( $LOG{'PATCH_TEX_INPUT'} );
106 4         54 $module = $PLUGGABLE . ucfirst $module;
107 4         21 my $input_patterns = $module->new()->pattern_data();
108 4         24 $patterns =~ s/$TEX_INPUT_COMMAND/$input_patterns/xmgis;
109             }
110              
111             # Take care of "x encoded carons:
112 98         40005 my $caron = $CARON_ESCAPE . $CLASS_BEGIN . join $EMPTY,
113             keys(%CARON_MAP) . $CLASS_END;
114 98         2393 $log->debug( $LOG{'PATCH_CARONS'} );
115 98         16321 $patterns =~ s{($caron)}{$CARON_MAP{$1}}xmgis;
116              
117             # Take care of \message command in TeX that TeX::Hyphen can't handle:
118             # uncoverable branch true
119 98 50       47545 if ( $patterns =~ /^$TEX_MESSAGE/xmgis ) {
120 0         0 $log->debug( $LOG{'PATCH_TEX_MESSAGE'} ); # uncoverable statement
121             # uncoverable statement
122 0         0 $patterns =~ s{^($TEX_MESSAGE)}{$TEX_COMMENT_LINE$1}xmgis;
123             }
124              
125             # Patch OpenOffice.org pattern data for TeX::Hyphen:
126 98 100       1436 if ( $patterns !~ /\\patterns/xmgis ) {
127 4         19 $log->debug( $LOG{'PATCH_OPENOFFICE'} );
128 4         45 $patterns = $TEX_PATTERN_START . $patterns . $TEX_PATTERN_FINISH;
129             }
130              
131 98         1191 my $fh = File::Temp->new();
132 98         63149 binmode $fh, $UTF8;
133 98         440 $fh->unlink_on_destroy(0);
134              
135             # uncoverable branch true
136 98 50       798 if ( !print {$fh} $patterns ) {
  98         28739  
137              
138             # uncoverable statement
139 0         0 $log->logdie( sprintf $ERR_CANT_WRITE, ( $fh->filename, $ERRNO ) );
140             }
141 98         307 my %cache = %{ $self->_cache };
  98         3639  
142 98         795 $cache{ $self->label } = $fh->filename;
143 98         5806 $self->_cache( {%cache} );
144 98         706 return $fh->filename;
145             }
146              
147             sub available {
148 264     264 1 880 my ($self) = @_;
149 24552         37979 return map { ref }
150 38544         182617 grep { $_->version == $TeX::Hyphen::Pattern::VERSION }
151 264         1629 map { $_->new() } $self->_available;
  38544         28968752  
152             }
153              
154             sub packaged {
155 4     4 1 156028 my ($self) = @_;
156 4         23 return $self->_available;
157             }
158              
159             sub _replug {
160 152     152   429 my ($self) = @_;
161 152         3682 my $module = ucfirst $self->label;
162 152         759 $module =~ s/$DASH/$UNDERSCORE/xmgis;
163 152         365 my $label = $module;
164 152         430 $module = $PLUGGABLE . $module;
165              
166             # Find a match with decreasing strictness:
167 152         678 $log->debug( sprintf $LOG{'MATCH_MODULE'}, $module );
168 152         2482 my @available = grep { /^$module$/xmgs } $self->available();
  14136         65134  
169 152 100       1602 if ( !@available ) {
170 55         511541 $log->info( sprintf $LOG{'NO_MATCH_CS'}, $module );
171 55         1594 @available = grep { /^$module$/xmgis } $self->available();
  5115         32668  
172             }
173 152 100       929234 if ( !@available ) {
174 55         508739 $log->warn( sprintf $LOG{'NO_MATCH_CI'}, $module );
175 55         1664 @available = grep { /^$module/xmgis } $self->available();
  5115         31893  
176             }
177 152 100       922 if ( !@available ) {
178 54         497346 $log->warn( sprintf $LOG{'NO_MATCH_PARTIAL'}, $module );
179 54 100       1603 if ( exists $FALLBACK{$label} ) {
180 2         22 $module = $PLUGGABLE . $FALLBACK{$label};
181 2         19 @available = grep { /^$module/xmgis } $self->available();
  186         10253  
182             }
183             }
184 152         10492 @available = sort @available;
185 152         10845 $log->info( sprintf $LOG{'MATCHES'}, join q{, }, @available );
186 152 100       3582 @available || $log->warn( sprintf $LOG{'NO_MATCH'}, $module );
187 152         2622 $self->_plugs( [ map { $_->new() } @available ] );
  99         612  
188 152         8030 return 0 + @available;
189             }
190              
191             sub DESTROY {
192 6     6   1118770 my ($self) = @_;
193 6         20 my @temp_files = values %{ $self->_cache };
  6         280  
194 6         70 $log->debug( sprintf $LOG{'DELETING'},
195             ( 0 + @temp_files, join ', ', @temp_files ) );
196 6         3963 my $deleted = unlink @temp_files;
197             ( $deleted != ( 0 + @temp_files ) )
198             ? $log->warn( $LOG{'DELETE_FAIL'} )
199 6 100       60 : $log->debug( $LOG{'DELETE_SUCCES'} );
200 6         268 return;
201             }
202              
203             1;
204             __END__
205              
206             =encoding utf8
207              
208             =for stopwords Bitbucket CPAN OpenOffice Readonly Subtags Apali tex Ipenburg
209              
210             =head1 NAME
211              
212             TeX::Hyphen::Pattern - class for providing a collection of TeX hyphenation
213             patterns for use with TeX::Hyphen.
214              
215             =head1 VERSION
216              
217             This is version C<v1.1.6>. To prevent plugging in of incompatible modules the
218             version of the pluggable modules must be the same as this module.
219              
220             =head1 SYNOPSIS
221              
222             use TeX::Hyphen;
223             use TeX::Hyphen::Pattern;
224              
225             $pat = TeX::Hyphen::Pattern->new();
226             $pat->label('Sh_ltn'); # Serbocroatian hyphenation patterns
227             $hyph = TeX::Hyphen->new($pat->filename);
228              
229             =head1 DESCRIPTION
230              
231             The L<TeX::Hyphen|TeX::Hyphen> module parses TeX files containing hyphenation
232             patterns for use with TeX based systems. This module includes TeX hyphenation
233             files from L<CPAN|http://www.ctan.org> and hyphenation patterns from
234             L<OpenOffice|http://www.openoffice.org> and provides a single interface to
235             use them in L<TeX::Hyphen|TeX::Hyphen>.
236              
237             =over 4
238              
239             =item L<http://tug.org/svn/texhyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/>
240              
241             =item L<http://svn.services.openoffice.org/ooo/trunk/dictionaries/>
242              
243             =back
244              
245             =head1 SUBROUTINES/METHODS
246              
247             =over 4
248              
249             =item TeX::Hyphen::Pattern-E<gt>new();
250              
251             =item TeX::Hyphen::Pattern-E<gt>new(label => $label);
252              
253             Constructs a new TeX::Hyphen::Pattern object.
254              
255             =item $pattern-E<gt>label($label);
256              
257             Sets the label that determines the pattern to use. The label can be a simple
258             language code, but since some languages can use multiple scripts with
259             different hyphenation rules we talk about patterns and not just languages.
260              
261             =item $pattern-E<gt>available();
262              
263             Returns a list of the available patterns.
264              
265             =item $pattern-E<gt>packaged();
266              
267             Returns a list of the available patterns. (alias for available)
268              
269             =item $pattern-E<gt>filename();
270              
271             Returns the name of a temporary file that TeX::Hyphen can read it's pattern
272             from for the current label. Returns C<undef> if no pattern language matching the
273             label was found.
274              
275             =back
276              
277             =head1 CONFIGURATION AND ENVIRONMENT
278              
279             The script F<tools/build_catalog_from_ctan.pl> was used to get the TeX
280             patterns file from the source on the internet and include them in this module.
281             After that the copyright messages were manually checked and inserted to make
282             sure this distribution complies with them.
283              
284             =head1 DEPENDENCIES
285              
286             =over 4
287              
288             =item L<Moose|Moose>
289             =item L<Encode|Encode>
290             =item L<File::Temp|File::Temp>
291             =item L<Log::Log4perl|Log::Log4perl>
292             =item L<Module::Pluggable|Module::Pluggable>
293             =item L<Readonly|Readonly>
294             =item L<Set::Scalar|Set::Scalar>
295              
296             =back
297              
298             L<TeX::Hyphen|TeX::Hyphen> is only a test requirement of
299             C<TeX::Hyphen::Pattern>. You might want to use the patterns in another way and
300             this module then just provides them independent of L<TeX::Hyphen|TeX::Hyphen>.
301              
302             =head1 INCOMPATIBILITIES
303              
304             =over 4
305              
306             Not all available pattern files are parsed correctly by
307             L<TeX::Hyphen|TeX::Hyphen>.
308              
309             =back
310              
311             =head1 DIAGNOSTICS
312              
313             This module uses L<Log::Log4perl|Log::Log4perl> for logging. It's a fatal
314             error when the temporary file containing the pattern can't be written.
315              
316             =over 4
317              
318             =item C<Can't write to file '%s', stopped %s>
319              
320             The temporary file created by L<File::Temp|File::Temp> could not be written.
321              
322             =back
323              
324             =head1 BUGS AND LIMITATIONS
325              
326             =over 4
327              
328             =item * Subtags aren't handled: C<en> could pick C<en_US>, C<en_UK> or C<ena>
329             (when Apali would be available) and this is silently ignored, it just does a
330             match on the string and picks what partly matches sorted, so using more exotic
331             scripts this can go wrong badly.
332              
333             =back
334              
335             Please report any bugs or feature requests at
336             L<Bitbucket|
337             https://bitbucket.org/rolandvanipenburg/tex-hyphen-pattern/issues>.
338              
339             =head1 AUTHOR
340              
341             Roland van Ipenburg, E<lt>roland@rolandvanipenburg.comE<gt>
342              
343             =head1 LICENSE AND COPYRIGHT
344              
345             Copyright 2009-2021 by Roland van Ipenburg
346              
347             This library is free software; you can redistribute it and/or modify
348             it under the same terms as Perl itself, either Perl version 5.10.0 or,
349             at your option, any later version of Perl 5 you may have available.
350              
351             The included pattern files in lib/TeX/Hyphen/Pattern/ are licensed as stated
352             in those files.
353              
354             =head1 DISCLAIMER OF WARRANTY
355              
356             BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
357             FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
358             OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
359             PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
360             EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
361             WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
362             ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
363             YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
364             NECESSARY SERVICING, REPAIR, OR CORRECTION.
365              
366             IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
367             WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
368             REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENSE, BE
369             LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
370             OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
371             THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
372             RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
373             FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
374             SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
375             SUCH DAMAGES.
376              
377             =cut