File Coverage

blib/lib/TeX/Hyphen.pm
Criterion Covered Total %
statement 144 155 92.9
branch 50 68 73.5
condition 14 15 93.3
subroutine 13 14 92.8
pod 0 5 0.0
total 221 257 85.9


line stmt bran cond sub pod time code
1              
2             package TeX::Hyphen;
3              
4             =head1 NAME
5              
6             TeX::Hyphen -- hyphenate words using TeX's patterns
7              
8             =head1 SYNOPSIS
9              
10             use TeX::Hyphen;
11             my $hyp = new TeX::Hyphen 'file' => 'hyphen.tex',
12             'style' => 'czech', leftmin => 2,
13             rightmin => 2;
14              
15             my $hyp = new TeX::Hyphen 'file' => 'hyphen.tex',
16             'style' => 'utf8';
17              
18             # my $hyp = new TeX::Hyphen "hyphen.tex";
19              
20             my $word = "representation";
21             my @points = $hyp->hyphenate($word);
22             print $hyp->visualize($word), "\n";
23              
24             =head1 DESCRIPTION
25              
26             Constructor new() creates a new Hyphen object and loads the file with
27             patterns into memory. Then you can ask it for hyphenation of a word by
28             calling a method of this object. If no file is specified, the default
29             Donald E. Knuth's F, that is included in this module, is
30             used instead.
31              
32             =head2 Arguments to constructor
33              
34             You can pass arguments to the new() call as hash, possible options are
35              
36             =over 4
37              
38             =item file
39              
40             Name of the file with the patters. It will be loaded and the resulting
41             object will be able to hyphenate according to patterns in that file.
42              
43             For convenience and backward compatibility, the file name can also be
44             specified as the first (odd) parameter to new().
45              
46             =item style
47              
48             Various languages use special shortcuts to specify the patterns.
49             Instead of doing the full TeX expansion, we use Perl code to parse the
50             patterns. The style option loads TeX::Hyphen::name_of_the_style module
51             and uses the parsing functions found in it.
52              
53             Currently, the default czech (which also works for English alright),
54             german, and utf8 are available. See the TeX::Hyphen::czech man page
55             for more information, especially if you want to support other
56             languages/styles.
57              
58             =item leftmin
59              
60             The minimum starting substring which will not be hyphenated. This
61             overrides the default specified in the style file.
62              
63             =item rightmin
64              
65             The minimum ending substring which will not be hyphenated. This
66             overrides the default specified in the style file.
67              
68             =back
69              
70             =head2 Methods that are supported
71              
72             Method hyphenate() returns list of places where the word can be
73             divided, so
74              
75             $hyp->visualize('representation')
76              
77             returns list (3, 5, 8, 10).
78              
79             Method visualize() can be used to show these points, so
80              
81             $hyp->visualize('representation')
82            
83             should return C, at least for English patterns.
84              
85             Variables I<$TeX::Hyphen::LEFTMIN> and I<$TeX::Hyphen::RIGHTMIN> can
86             be used to restrict minimal starting and ending substring where it is
87             not possible to hyphenate. They both default to 2 but should be
88             changed to match the paratemers used to generate the patterns.
89              
90             Variable I<$TeX::Hyphen::DEBUG> can be set to see some statistics and
91             processing.
92              
93             The file with hyphenation patterns may contain C<\'> and C<\v> accents,
94             used in the Czech (and other) languages.
95              
96             =cut
97              
98 2     2   5748 use strict;
  2         3  
  2         58  
99 2     2   6 use vars qw( $VERSION $DEBUG $LEFTMIN $RIGHTMIN $errstr );
  2         2  
  2         2076  
100              
101             $VERSION = '1.16';
102 0     0 0 0 sub Version () { $VERSION; }
103              
104             $DEBUG ||= 0;
105              
106             # To protect beginning and end of the word from hyphenation
107             $LEFTMIN = 2;
108             $RIGHTMIN = 2;
109              
110             my (@DATA, $DATA_LOADED);
111              
112             # #############################################################
113             # Constructor. Parameter specifies file with patterns.
114             # File is searched for \patterns{ ... } and \hyphenation{ ... }
115             # sections and these are used.
116             #
117             sub new {
118 8     8 0 956 my $class = shift;
119 8         9 my ($file, %opts);
120 8 100       23 if (scalar(@_) % 2) {
121 3         5 $file = shift;
122 3         6 %opts = @_;
123             } else {
124 5         8 %opts = @_;
125 5         8 $file = $opts{'file'};
126             }
127 8         16 local *FILE;
128 8 100       17 if (not defined $file) {
129 5 100       10 if (not defined $DATA_LOADED) {
130 2         2586 @DATA = ;
131 2         138 $DATA_LOADED = 1;
132             }
133             } else {
134 3 50       90 open FILE, $file or do {
135 0         0 $errstr = "Error opening file `$file': $!";
136 0         0 return;
137             };
138             }
139 8         11 my $self = {};
140 8         16 bless $self, $class;
141              
142 8         25 local ($/) = "\n";
143 8         5 my ($tag, $value);
144 8         9 my $hyphen = {};
145 8         6 my $beginhyphen = {};
146 8         9 my $endhyphen = {};
147 8         7 my $bothhyphen = {};
148 8         6 my $exception = {};
149              
150 8         6 my ($process_patterns, $process_hyphenation);
151 8         10 my ($leftmin, $rightmin) = ($LEFTMIN, $RIGHTMIN);
152 8 100       23 if (not defined $opts{'style'}) {
153 7         9 $opts{'style'} = 'czech'; # for backward compatibility
154             }
155 8 50       17 if (defined $opts{'style'}) {
156 8 100       15 if ($opts{'style'} eq 'utf8') {
157 1         4 binmode(FILE,':utf8');
158             }
159 2     2   827 eval qq!use ${class}::$opts{'style'}!;
  2     1   3  
  2     1   25  
  1     1   5  
  1     1   1  
  1     1   10  
  1     1   4  
  1         1  
  1         7  
  1         5  
  1         2  
  1         9  
  1         10  
  1         2  
  1         18  
  1         7  
  1         1  
  1         11  
  1         357  
  1         1  
  1         13  
  8         542  
160 8 50       20 if (not $@) {
161 8         360 eval "\$process_patterns = \\&${class}::$opts{'style'}::process_patterns";
162 8         271 eval "\$process_hyphenation = \\&${class}::$opts{'style'}::process_hyphenation";
163 8         249 eval "\$leftmin = \$${class}::$opts{'style'}::LEFTMIN";
164 8         226 eval "\$rightmin = \$${class}::$opts{'style'}::RIGHTMIN";
165             } else {
166 0         0 $errstr = "Error loading style module $class::$opts{'style'}: $@";
167 0         0 return;
168             }
169             }
170 8 50       38 $leftmin = $opts{leftmin} if exists $opts{leftmin};
171 8 100       18 $rightmin = $opts{rightmin} if exists $opts{rightmin};
172              
173 8         10 my ($in_patterns, $in_hyphenation) = (0, 0);
174 8         8 my $i = 0;
175 8   100     80 while ((defined $file and defined($_ = ))
      100        
      66        
176             or (not defined $file and defined($_ = $DATA[$i++]))) {
177 22347         16350 s/\%.*$//; # comment out
178 22347         15215 chomp;
179 22347 100       18629 if ($in_patterns) {
    100          
    100          
    100          
180 22254         27294 $in_patterns = $process_patterns->($_,
181             $bothhyphen, $beginhyphen,
182             $endhyphen, $hyphen);
183             } elsif ($in_hyphenation) {
184 75         93 $in_hyphenation = $process_hyphenation->($_, $exception);
185             } elsif (/\\patterns{/) { # find the \patterns section
186 8         30 $in_patterns = 1;
187             } elsif (/\\hyphenation{/) {
188 5         20 $in_hyphenation = 1;
189             }
190             }
191 8 100       45 close FILE if defined $file;
192 8         55 $self->{hyphen} = $hyphen;
193 8         13 $self->{begin} = $beginhyphen;
194 8         7 $self->{end} = $endhyphen;
195 8         7 $self->{both} = $bothhyphen;
196 8         11 $self->{exception} = $exception;
197 8 0       12 print STDERR 'Statistics for ', (defined $file ? $file : 'hyphen.tex'),
    50          
198             ': all ' , scalar %$hyphen,
199             ' (', scalar keys %$hyphen,
200             '), exception ', scalar %$exception,
201             ' (', scalar keys %$exception,
202             "),\n\tbegin ", scalar %$beginhyphen,
203             ' (', scalar keys %$beginhyphen,
204             '), end ', scalar %$endhyphen,
205             ' (', scalar keys %$endhyphen,
206             '), both ', scalar %$bothhyphen,
207             ' (', scalar keys %$bothhyphen, ")\n" if $DEBUG;
208            
209 8         48 $self->{exact} = { %$exception };
210 8         15 $self->{leftmin} = $leftmin;
211 8         13 $self->{rightmin} = $rightmin;
212 8         53 $self;
213             }
214              
215             # ############################################
216             # For given word finds places for hyphenation.
217             # Returns an array specifying the places.
218             #
219             sub hyphenate {
220 1390     1390 0 4610 my ($self, $word) = (shift, shift);
221              
222 1390 50       1577 print STDERR "Hyphenate `$word'\n" if $DEBUG;
223            
224 1390         1142 my $exact = $self->{exact};
225 1390 100       1654 if (defined(my $res = $exact->{$word})) {
226 3 50       5 print STDERR "Exact match $res\n" if $DEBUG;
227 3         5 return $self->make_result_list($res);
228             }
229              
230 1387         942 my $hyphen = $self->{hyphen};
231 1387         787 my $beginhyphen = $self->{begin};
232 1387         954 my $endhyphen = $self->{end};
233 1387         823 my $bothhyphen = $self->{both};
234              
235 1387         879 my $totallength = length $word;
236 1387         1964 my @result = (0) x ($totallength + 1);
237              
238             # walk the word
239 1387         1141 my $rightstop = $totallength - $self->{rightmin};
240 1387         816 my $pos;
241 1387         1708 for ($pos = 0; $pos <= $rightstop; $pos++) {
242             # length of the rest of the word
243 4471         2846 my $restlength = $totallength - $pos;
244             # length of a substring
245 4471         2185 my $length;
246 4471         4972 for ($length = 1; $length <= $restlength; $length++) {
247 20257         14750 my $substr = substr $word, $pos, $length;
248 20257         9834 my $value;
249             my $j;
250 0         0 my $letter;
251 20257 100       26418 if (defined($value = $hyphen->{$substr})) {
252 2430         1369 $j = $pos;
253 2430 50       2556 print STDERR "$j: $substr: $value\n" if $DEBUG > 2;
254 2430         4230 while ($value =~ /(.)/gs) {
255 7052 100       10616 $result[$j] = $1 if ($1 > $result[$j]);
256 7052         9411 $j++;
257             }
258             }
259 20257 100 100     30984 if (($pos == 0) and
260             defined($value = $beginhyphen->{$substr})) {
261 236         153 $j = 0;
262 236 50       250 print STDERR "$j: .$substr: $value\n" if $DEBUG > 2;
263 236         445 while ($value =~ /(.)/gs) {
264 815 100       1184 $result[$j] = $1 if ($1 > $result[$j]);
265 815         1086 $j++;
266             }
267             }
268 20257 100 100     44966 if (($restlength == $length) and
269             defined($value = $endhyphen->{$substr})) {
270 222         138 $j = $pos;
271 222 50       245 print STDERR "$j: $substr.: $value\n" if $DEBUG > 2;
272 222         410 while ($value =~ /(.)/gs) {
273 477 100       790 $result[$j] = $1 if ($1 > $result[$j]);
274 477         951 $j++;
275             }
276             }
277             }
278             }
279 1387         726 my $value;
280             my $letter;
281 1387 50       1625 if (defined($value = $bothhyphen->{$word})) {
282 0         0 my $j = 0;
283 0 0       0 print STDERR "$j: .$word.: $value\n" if $DEBUG > 2;
284 0         0 while ($value =~ /(.)/gs) {
285 0 0       0 $result[$j] = $1 if ($1 > $result[$j]);
286 0         0 $j++;
287             }
288             }
289              
290 1387         1982 my $result = join '', @result;
291             ### substr($result, 0, $self->{leftmin} + 1) = '0' x ($self->{leftmin} + 1);
292 1387         1596 substr($result, 0, $self->{leftmin}) = '0' x $self->{leftmin};
293 1387         1255 substr($result, -$self->{rightmin}) = '0' x $self->{rightmin};
294              
295 1387 50       1570 print STDERR "Result: $result\n" if $DEBUG;
296 1387         1446 return $self->make_result_list($result);
297             }
298              
299             # ####################
300             #
301             #
302             sub make_result_list {
303 1390     1390 0 1025 my ($self, $result) = @_;
304 1390         941 my @result = ();
305 1390         890 my $i = 0;
306 1390         2801 while ($result =~ /(.)/g) {
307 7280 100       9073 push @result, $i if (int($1) % 2);
308 7280         9702 $i++;
309             }
310 1390         2763 @result;
311             }
312              
313             # #########################################
314             # For a word show the result of hyphenation
315             #
316             sub visualize {
317 17     17 0 273 my ($self, $word) = (shift, shift);
318 17         7 my $number = 0;
319 17         42 my $pos;
320 17         20 for $pos ($self->hyphenate($word)) {
321 29         33 substr($word, $pos + $number, 0) = "-";
322 29         22 $number++;
323             }
324 17         29 $word;
325             }
326              
327             =head1 VERSION
328              
329             1.16
330              
331             =head1 AVAILABLE FROM
332              
333             http://www.adelton.com/perl/TeX-Hyphen/
334              
335             =head1 AUTHOR
336              
337             (c) 1997--2015 Jan Pazdziora.
338              
339             All rights reserved. This package is free software; you can
340             redistribute it and/or modify it under the same terms as Perl itself.
341              
342             Contact the author at jpx dash perl at adelton dot com.
343              
344             =head1 SEE ALSO
345              
346             perl(1), TeX::Hyphen::czech.
347              
348             =cut
349              
350             1;
351              
352             __DATA__