File Coverage

blib/lib/TeX/Hyphen.pm
Criterion Covered Total %
statement 144 155 92.9
branch 50 68 73.5
condition 14 15 93.3
subroutine 13 14 92.8
pod 0 5 0.0
total 221 257 85.9


line stmt bran cond sub pod time code
1              
2             package TeX::Hyphen;
3              
4             =head1 NAME
5              
6             TeX::Hyphen -- hyphenate words using TeX's patterns
7              
8             =head1 SYNOPSIS
9              
10             use TeX::Hyphen;
11             my $hyp = new TeX::Hyphen 'file' => 'hyphen.tex',
12             'style' => 'czech', leftmin => 2,
13             rightmin => 2;
14              
15             my $hyp = new TeX::Hyphen 'file' => 'hyphen.tex',
16             'style' => 'utf8';
17              
18             # my $hyp = new TeX::Hyphen "hyphen.tex";
19              
20             my $word = "representation";
21             my @points = $hyp->hyphenate($word);
22             print $hyp->visualize($word), "\n";
23              
24             =head1 DESCRIPTION
25              
26             Constructor new() creates a new Hyphen object and loads the file with
27             patterns into memory. Then you can ask it for hyphenation of a word by
28             calling a method of this object. If no file is specified, the default
29             Donald E. Knuth's F, that is included in this module, is
30             used instead.
31              
32             =head2 Arguments to constructor
33              
34             You can pass arguments to the new() call as hash, possible options are
35              
36             =over 4
37              
38             =item file
39              
40             Name of the file with the patters. It will be loaded and the resulting
41             object will be able to hyphenate according to patterns in that file.
42              
43             For convenience and backward compatibility, the file name can also be
44             specified as the first (odd) parameter to new().
45              
46             =item style
47              
48             Various languages use special shortcuts to specify the patterns.
49             Instead of doing the full TeX expansion, we use Perl code to parse the
50             patterns. The style option loads TeX::Hyphen::name_of_the_style module
51             and uses the parsing functions found in it.
52              
53             Currently, the default czech (which also works for English alright),
54             german, and utf8 are available. See the TeX::Hyphen::czech man page
55             for more information, especially if you want to support other
56             languages/styles.
57              
58             =item leftmin
59              
60             The minimum starting substring which will not be hyphenated. This
61             overrides the default specified in the style file.
62              
63             =item rightmin
64              
65             The minimum ending substring which will not be hyphenated. This
66             overrides the default specified in the style file.
67              
68             =back
69              
70             =head2 Methods that are supported
71              
72             Method hyphenate() returns list of places where the word can be
73             divided, so
74              
75             $hyp->hyphenate('representation')
76              
77             returns list (3, 5, 8, 10).
78              
79             Method visualize() can be used to show these points, so
80              
81             $hyp->visualize('representation')
82            
83             should return C, at least for English patterns.
84              
85             Variables I<$TeX::Hyphen::LEFTMIN> and I<$TeX::Hyphen::RIGHTMIN> can
86             be used to restrict minimal starting and ending substring where it is
87             not possible to hyphenate. They both default to 2 but should be
88             changed to match the paratemers used to generate the patterns.
89              
90             Variable I<$TeX::Hyphen::DEBUG> can be set to see some statistics and
91             processing.
92              
93             The file with hyphenation patterns may contain C<\'> and C<\v> accents,
94             used in the Czech (and other) languages.
95              
96             =cut
97              
98 2     2   8473 use strict;
  2         4  
  2         83  
99 2     2   7 use vars qw( $VERSION $DEBUG $LEFTMIN $RIGHTMIN $errstr );
  2         2  
  2         2860  
100              
101             $VERSION = '1.18';
102 0     0 0 0 sub Version () { $VERSION; }
103              
104             $DEBUG ||= 0;
105              
106             # To protect beginning and end of the word from hyphenation
107             $LEFTMIN = 2;
108             $RIGHTMIN = 2;
109              
110             my (@DATA, $DATA_LOADED);
111              
112             # #############################################################
113             # Constructor. Parameter specifies file with patterns.
114             # File is searched for \patterns{ ... } and \hyphenation{ ... }
115             # sections and these are used.
116             #
117             sub new {
118 8     8 0 3790 my $class = shift;
119 8         17 my ($file, %opts);
120 8 100       41 if (scalar(@_) % 2) {
121 3         9 $file = shift;
122 3         12 %opts = @_;
123             } else {
124 5         20 %opts = @_;
125 5         28 $file = $opts{'file'};
126             }
127 8         39 local *FILE;
128 8 100       31 if (not defined $file) {
129 5 100       19 if (not defined $DATA_LOADED) {
130 2         5268 @DATA = ;
131 2         214 $DATA_LOADED = 1;
132             }
133             } else {
134 3 50       220 open FILE, $file or do {
135 0         0 $errstr = "Error opening file `$file': $!";
136 0         0 return;
137             };
138             }
139 8         26 my $self = {};
140 8         28 bless $self, $class;
141              
142 8         58 local ($/) = "\n";
143 8         13 my ($tag, $value);
144 8         29 my $hyphen = {};
145 8         13 my $beginhyphen = {};
146 8         13 my $endhyphen = {};
147 8         14 my $bothhyphen = {};
148 8         16 my $exception = {};
149              
150 8         14 my ($process_patterns, $process_hyphenation);
151 8         23 my ($leftmin, $rightmin) = ($LEFTMIN, $RIGHTMIN);
152 8 100       42 if (not defined $opts{'style'}) {
153 7         19 $opts{'style'} = 'czech'; # for backward compatibility
154             }
155 8 50       35 if (defined $opts{'style'}) {
156 8 100       36 if ($opts{'style'} eq 'utf8') {
157 1         12 binmode(FILE,':utf8');
158             }
159 2     2   1679 eval qq!use ${class}::$opts{'style'}!;
  2     1   4  
  2     1   43  
  1     1   14  
  1     1   3  
  1     1   31  
  1     1   10  
  1         3  
  1         22  
  1         11  
  1         6  
  1         50  
  1         11  
  1         2  
  1         17  
  1         15  
  1         3  
  1         33  
  1         1059  
  1         5  
  1         40  
  8         1231  
160 8 50       34 if (not $@) {
161 8         823 eval "\$process_patterns = \\&${class}::$opts{'style'}::process_patterns";
162 8         618 eval "\$process_hyphenation = \\&${class}::$opts{'style'}::process_hyphenation";
163 8         605 eval "\$leftmin = \$${class}::$opts{'style'}::LEFTMIN";
164 8         651 eval "\$rightmin = \$${class}::$opts{'style'}::RIGHTMIN";
165             } else {
166 0         0 $errstr = "Error loading style module $class::$opts{'style'}: $@";
167 0         0 return;
168             }
169             }
170 8 50       49 $leftmin = $opts{leftmin} if exists $opts{leftmin};
171 8 100       30 $rightmin = $opts{rightmin} if exists $opts{rightmin};
172              
173 8         17 my ($in_patterns, $in_hyphenation) = (0, 0);
174 8         18 my $i = 0;
175 8   100     155 while ((defined $file and defined($_ = ))
      100        
      66        
176             or (not defined $file and defined($_ = $DATA[$i++]))) {
177 22347         34156 s/\%.*$//; # comment out
178 22347         28124 chomp;
179 22347 100       38947 if ($in_patterns) {
    100          
    100          
    100          
180 22254         53246 $in_patterns = $process_patterns->($_,
181             $bothhyphen, $beginhyphen,
182             $endhyphen, $hyphen);
183             } elsif ($in_hyphenation) {
184 75         171 $in_hyphenation = $process_hyphenation->($_, $exception);
185             } elsif (/\\patterns\{/) { # find the \patterns section
186 8         73 $in_patterns = 1;
187             } elsif (/\\hyphenation\{/) {
188 5         41 $in_hyphenation = 1;
189             }
190             }
191 8 100       87 close FILE if defined $file;
192 8         108 $self->{hyphen} = $hyphen;
193 8         20 $self->{begin} = $beginhyphen;
194 8         32 $self->{end} = $endhyphen;
195 8         14 $self->{both} = $bothhyphen;
196 8         17 $self->{exception} = $exception;
197 8 0       61 print STDERR 'Statistics for ', (defined $file ? $file : 'hyphen.tex'),
    50          
198             ': all ' , scalar %$hyphen,
199             ' (', scalar keys %$hyphen,
200             '), exception ', scalar %$exception,
201             ' (', scalar keys %$exception,
202             "),\n\tbegin ", scalar %$beginhyphen,
203             ' (', scalar keys %$beginhyphen,
204             '), end ', scalar %$endhyphen,
205             ' (', scalar keys %$endhyphen,
206             '), both ', scalar %$bothhyphen,
207             ' (', scalar keys %$bothhyphen, ")\n" if $DEBUG;
208            
209 8         101 $self->{exact} = { %$exception };
210 8         24 $self->{leftmin} = $leftmin;
211 8         25 $self->{rightmin} = $rightmin;
212 8         134 $self;
213             }
214              
215             # ############################################
216             # For given word finds places for hyphenation.
217             # Returns an array specifying the places.
218             #
219             sub hyphenate {
220 1390     1390 0 6083 my ($self, $word) = (shift, shift);
221              
222 1390 50       2045 print STDERR "Hyphenate `$word'\n" if $DEBUG;
223            
224 1390         1353 my $exact = $self->{exact};
225 1390 100       2087 if (defined(my $res = $exact->{$word})) {
226 3 50       7 print STDERR "Exact match $res\n" if $DEBUG;
227 3         7 return $self->make_result_list($res);
228             }
229              
230 1387         1078 my $hyphen = $self->{hyphen};
231 1387         1049 my $beginhyphen = $self->{begin};
232 1387         1073 my $endhyphen = $self->{end};
233 1387         1017 my $bothhyphen = $self->{both};
234              
235 1387         1135 my $totallength = length $word;
236 1387         2132 my @result = (0) x ($totallength + 1);
237              
238             # walk the word
239 1387         1286 my $rightstop = $totallength - $self->{rightmin};
240 1387         885 my $pos;
241 1387         2416 for ($pos = 0; $pos <= $rightstop; $pos++) {
242             # length of the rest of the word
243 4471         3732 my $restlength = $totallength - $pos;
244             # length of a substring
245 4471         2907 my $length;
246 4471         6753 for ($length = 1; $length <= $restlength; $length++) {
247 20257         18528 my $substr = substr $word, $pos, $length;
248 20257         13621 my $value;
249             my $j;
250 0         0 my $letter;
251 20257 100       32310 if (defined($value = $hyphen->{$substr})) {
252 2429         1861 $j = $pos;
253 2429 50       3210 print STDERR "$j: $substr: $value\n" if $DEBUG > 2;
254 2429         5713 while ($value =~ /(.)/gs) {
255 7053 100       12674 $result[$j] = $1 if ($1 > $result[$j]);
256 7053         12147 $j++;
257             }
258             }
259 20257 100 100     39781 if (($pos == 0) and
260             defined($value = $beginhyphen->{$substr})) {
261 236         205 $j = 0;
262 236 50       383 print STDERR "$j: .$substr: $value\n" if $DEBUG > 2;
263 236         547 while ($value =~ /(.)/gs) {
264 815 100       1536 $result[$j] = $1 if ($1 > $result[$j]);
265 815         1485 $j++;
266             }
267             }
268 20257 100 100     60852 if (($restlength == $length) and
269             defined($value = $endhyphen->{$substr})) {
270 223         187 $j = $pos;
271 223 50       314 print STDERR "$j: $substr.: $value\n" if $DEBUG > 2;
272 223         541 while ($value =~ /(.)/gs) {
273 479 100       933 $result[$j] = $1 if ($1 > $result[$j]);
274 479         1290 $j++;
275             }
276             }
277             }
278             }
279 1387         1052 my $value;
280             my $letter;
281 1387 50       2098 if (defined($value = $bothhyphen->{$word})) {
282 0         0 my $j = 0;
283 0 0       0 print STDERR "$j: .$word.: $value\n" if $DEBUG > 2;
284 0         0 while ($value =~ /(.)/gs) {
285 0 0       0 $result[$j] = $1 if ($1 > $result[$j]);
286 0         0 $j++;
287             }
288             }
289              
290 1387         2598 my $result = join '', @result;
291             ### substr($result, 0, $self->{leftmin} + 1) = '0' x ($self->{leftmin} + 1);
292 1387         1883 substr($result, 0, $self->{leftmin}) = '0' x $self->{leftmin};
293 1387         1423 substr($result, -$self->{rightmin}) = '0' x $self->{rightmin};
294              
295 1387 50       1933 print STDERR "Result: $result\n" if $DEBUG;
296 1387         1819 return $self->make_result_list($result);
297             }
298              
299             # ####################
300             #
301             #
302             sub make_result_list {
303 1390     1390 0 1449 my ($self, $result) = @_;
304 1390         1266 my @result = ();
305 1390         1040 my $i = 0;
306 1390         3440 while ($result =~ /(.)/g) {
307 7280 100       11812 push @result, $i if (int($1) % 2);
308 7280         11601 $i++;
309             }
310 1390         3282 @result;
311             }
312              
313             # #########################################
314             # For a word show the result of hyphenation
315             #
316             sub visualize {
317 17     17 0 1542 my ($self, $word) = (shift, shift);
318 17         24 my $number = 0;
319 17         24 my $pos;
320 17         43 for $pos ($self->hyphenate($word)) {
321 29         70 substr($word, $pos + $number, 0) = "-";
322 29         50 $number++;
323             }
324 17         63 $word;
325             }
326              
327             =head1 VERSION
328              
329             1.18
330              
331             =head1 AVAILABLE FROM
332              
333             http://www.adelton.com/perl/TeX-Hyphen/
334              
335             =head1 AUTHOR
336              
337             (c) 1997--2016 Jan Pazdziora.
338              
339             All rights reserved. This package is free software; you can
340             redistribute it and/or modify it under the same terms as Perl itself.
341              
342             Contact the author at jpx dash perl at adelton dot com.
343              
344             =head1 SEE ALSO
345              
346             perl(1), TeX::Hyphen::czech.
347              
348             =cut
349              
350             1;
351              
352             __DATA__