File Coverage

blib/lib/Text/Reflow.pm
Criterion Covered Total %
statement 234 298 78.5
branch 97 160 60.6
condition 34 70 48.5
subroutine 21 22 95.4
pod 0 12 0.0
total 386 562 68.6


line stmt bran cond sub pod time code
1             package Text::Reflow;
2              
3             require 5.005_62;
4 1     1   23681 use strict;
  1         5  
  1         78  
5 1     1   14 use warnings;
  1         3  
  1         88  
6 1     1   1660 use integer;
  1         36  
  1         11  
7 1     1   83 use Carp;
  1         2  
  1         1537  
8              
9             require Exporter;
10             require DynaLoader;
11              
12             our @ISA = qw(Exporter DynaLoader);
13              
14             # Original script written by Michael Larsen, larsen@edu.upenn.math
15             # Modified by Martin Ward, martin@gkc.org.uk
16             # Copyright 1994 Michael Larsen and Martin Ward
17             # Email: martin@gkc.org.uk
18             #
19             # This program is free software; you can redistribute it and/or modify
20             # it under the terms of either the Artistic License or
21             # the GNU General Public License as published by
22             # the Free Software Foundation; either version 3 of the License, or
23             # (at your option) any later version.
24             #
25             # This program is distributed in the hope that it will be useful,
26             # but WITHOUT ANY WARRANTY; without even the implied warranty of
27             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28             # GNU General Public License for more details.
29             #
30              
31             # Items to export into callers namespace by default. Note: do not export
32             # names by default without a very good reason. Use EXPORT_OK instead.
33             # Do not simply export all your public functions/methods/constants.
34              
35             # This allows declaration use Text::Reflow ':all';
36             # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
37             # will save memory.
38              
39             our %EXPORT_TAGS = ( 'all' => [ qw(
40             reflow_file
41             reflow_string
42             reflow_array
43             ) ] );
44              
45             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
46              
47             our @EXPORT = qw(
48            
49             );
50              
51             $Text::Reflow::VERSION = "1.16";
52              
53             bootstrap Text::Reflow $Text::Reflow::VERSION;
54              
55             # Preloaded methods go here.
56              
57             # This is the perl version of the C function reflow_trial
58             # If the C XSUB doesn't work, comment out the line
59             # bootstrap Text::Reflow $VERSION;
60             # above, and take the _ from the front of this perl version:
61              
62             sub _reflow_trial($$$$$$$$$$) {
63 0     0   0 my ($optimum, $maximum, $wordcount,
64             $penaltylimit, $semantic, $shortlast,
65             $word_len, $space_len, $extra, $result) = @_;
66 0         0 my ($lastbreak, @linkbreak);
67 0         0 my ($j, $k, $interval, $penalty, @totalpenalty, $bestsofar);
68 0         0 my (@best_linkbreak, $best_lastbreak, $opt);
69 0         0 my @optimum = unpack("N*", pack("H*", $optimum));
70 0         0 my @word_len = unpack("N*", pack("H*", $word_len));
71 0         0 my @space_len = unpack("N*", pack("H*", $space_len));
72 0         0 my @extra = unpack("N*", pack("H*", $extra));
73 0         0 my $best = $penaltylimit * 21;
74 0         0 foreach $opt (@optimum) {
75 0         0 @linkbreak = ();
76 0         0 for ($j = 0; $j < $wordcount; $j++) { # Optimize preceding break
77 0         0 $interval = 0;
78 0         0 $totalpenalty[$j] = $penaltylimit * 2;
79 0         0 for ($k = $j; $k >= 0; $k--) {
80 0         0 $interval += $word_len[$k];
81 0 0 0     0 last if (($k < $j) && (($interval > $opt + 10)
      0        
82             || ($interval >= $maximum)));
83 0         0 $penalty = ($interval - $opt) * ($interval - $opt);
84 0         0 $interval += $space_len[$k];
85 0 0       0 $penalty += $totalpenalty[$k-1] if ($k > 0);
86 0         0 $penalty -= ($extra[$j] * $semantic)/2;
87 0 0       0 if ($penalty < $totalpenalty[$j]) {
88 0         0 $totalpenalty[$j] = $penalty;
89 0         0 $linkbreak[$j] = $k-1;
90             }
91             }
92             }
93 0         0 $interval = 0;
94 0         0 $bestsofar = $penaltylimit * 20;
95 0         0 $lastbreak = $wordcount-2;
96             # Pick a break for the last line which gives
97             # the least penalties for previous lines:
98 0         0 for ($k = $wordcount-2; $k >= -1; $k--) { # Break after k?
99 0         0 $interval += $word_len[$k+1];
100 0 0 0     0 last if (($interval > $opt + 10) || ($interval > $maximum));
101 0 0       0 if ($interval > $opt) { # Don't make last line too long
102 0         0 $penalty = ($interval - $opt) * ($interval - $opt);
103             } else {
104 0         0 $penalty = 0;
105             }
106 0         0 $interval += $space_len[$k+1];
107 0 0       0 $penalty += $totalpenalty[$k] if ($k >= 0);
108 0 0       0 $penalty += $shortlast * $semantic if ($wordcount - $k - 1 <= 2);
109 0 0       0 if ($penalty <= $bestsofar) {
110 0         0 $bestsofar = $penalty;
111 0         0 $lastbreak = $k;
112             }
113             }
114             # Save these breaks if they are an improvement:
115 0 0       0 if ($bestsofar < $best) {
116 0         0 $best_lastbreak = $lastbreak;
117 0         0 @best_linkbreak = @linkbreak;
118 0         0 $best = $bestsofar;
119             }
120             } # Next $opt
121             # Return the best breaks:
122 0         0 $result = unpack("H*", pack("N*", ($best_lastbreak, @best_linkbreak)));
123 0         0 return($result);
124             }
125              
126              
127 1         1950 use vars qw(
128             $IO_Files $lastbreak $poetryindent %abbrev @output
129             $connpenalty $maximum $quote %connectives @save_opts
130             $dependent $namebreak $semantic %keys @space_len
131             $frenchspacing $noreflow $sentence @extra @tmp
132             $indent $oneparagraph $shortlast @from @to
133             $indent1 $optimum $skipindented @linewords @word_len
134             $indent2 $penaltylimit $skipto @linkbreak @words
135             $independent $pin $wordcount @optimum
136 1     1   16 );
  1         3  
137              
138             # The following parameters can be twiddled to taste:
139              
140             %keys = (optimum => '.*', maximum => '\d+',
141             indent => '.*', indent1 => '.*', indent2 => '.*',
142             quote => '.*',
143             skipto => '.*', skipindented => '[012]', oneparagraph => '[yYnN]',
144             frenchspacing => '[yYnN]',
145             noreflow => '.*',
146             semantic => '\d+', namebreak => '\d+',
147             sentence => '\d+', independent => '\d+', dependent => '\d+',
148             shortlast => '\d+', connpenalty => '\d+',
149             poetryindent => '\d+');
150            
151              
152             $optimum = [65]; # Best line length 65. Also try [60..70]
153             $maximum = 75; # Maximum possible line length 80
154             $indent1 = ""; # Indentation for first line
155             $indent2 = ""; # Indentation for each line after the first
156             $quote = ""; # Quote characters to remove from the front of each line
157             $skipto = ""; # Pattern to skip to before starting to reflow
158             $skipindented = 2; # Number of sequential indented lines required
159             # before the group of lines will be skipped
160             $noreflow = ""; # A regexp to indicate lines which should not be reflowed
161             # eg table of contents: '\.\s*\.\s*\.\s*\.\s*\.'
162             $frenchspacing = "n"; # If "y" then don't put two spaces at end of sentence/clause
163             $oneparagraph = "n"; # If "Y" then put all the input into a single paragraph
164              
165             $semantic = 30; # Extent to which semantic factors matter 20
166             $namebreak = 20; # Penalty for splitting up name 10
167             $sentence = 20; # Penalty for sentence widows and orphans 6
168             $independent = 10; # Penalty for independent clause w's & o's
169             $dependent = 6; # Penalty for dependent clause w's & o's
170             $shortlast = 5; # Penalty for a short last line (1 or 2 words) in a paragraph
171             $connpenalty = 1; # Multiplier to avoid penalties at end of line
172             $poetryindent = 1; # Treat $skipindented consecutive lines indented by
173             # at least this much
174              
175             $penaltylimit = 0x2000000;
176             @save_opts = (); # Saved original values of options
177              
178             $pin = " " x $poetryindent;
179              
180             # NB By default there must be two consecutive indented lines for it to count
181             # as poetry, so the program will not mistake a paragraph indentation
182             # for a line of poetry.
183              
184              
185             # Abbreviations from a half dozen novels,
186             # Titles and other abbreviations which should discourage
187             # a break have the value 1:
188              
189             %abbrev = (
190             Jan => 1, Feb => 1, Mar => 1, Apr => 1, Jun => 1, Jul => 1,
191             Aug => 1, Sep => 1, Sept => 1, Oct => 1, Nov => 1, Dec => 1,
192             Pvt => 1, Cpl => 1, Sgt => 1, Ens => 1, Lieut => 1, Capt => 1, Cmdr => 1,
193             Maj => 1, Col => 1, Gen => 1, Adm => 1,
194             Dr => 1, Hon => 1, Mlle => 1, Mme => 1, Mr => 1, Mrs => 1, Miss => 1,
195             Prof => 1, Rev => 1,
196             Bart => 2, Esq => 2, etc => 2,
197             No => 1, St => 1,
198             Ave => 2, Rd => 2, Blvd => 2, Ct => 2, Cir => 2,
199             A => 1, B => 1, C => 1, D => 1, E => 1, F => 1, G => 1, H => 1,
200             I => 1, J => 1, K => 1, L => 1, M => 1, N => 1, O => 1, P => 1, Q => 1,
201             R => 1, S => 1, T => 1, U => 1, V => 1, W => 1, X => 1, Y => 1, Z => 1);
202              
203             # The value is the rlative effort to avoid breaking
204             # a line after this connective
205              
206             %connectives = ( # Extracted from /usr/dict/connectives
207             the => 4, he => 4,
208             of => 2, and => 2, to => 2, a => 2,
209             in => 2, that => 2, is => 1, was => 1,
210             for => 2, with => 2, as => 2, his => 1,
211             on => 1, be => 1, at => 1, by => 2,
212             had => 1, not => 1, are => 1, but => 2, from => 1,
213             or => 2, have => 1, an => 2, which => 2,
214             one => 1, were => 1, her => 1, all => 1, their => 1,
215             when => 2, who => 2, will => 1, more => 1, no => 1,
216             if => 2, out => 1, so => 2, what => 2, its => 1,
217             about => 1, into => 1, than => 1,
218             only => 1, other => 1, new => 1, some => 1,
219             these => 2, two => 1, may => 1,
220             do => 1, first => 1, any => 1, my => 1, now => 1,
221             such => 1, like => 2, our => 1, over => 1,
222             even => 1, most => 1, after => 1, also => 2,
223             many => 1, before => 1, through => 1, where => 2, your => 1,
224             well => 1, down => 1, should => 1, because => 2,
225             each => 1, just => 1, those => 2, how => 2, too => 1,
226             good => 1, very => 2, here => 1, between => 1, both => 1,
227             under => 1, never => 1, same => 1, another => 1,
228             while => 2, last => 1, might => 1, great => 1,
229             since => 2, against => 1, right => 1, three => 2, next => 2);
230              
231              
232             sub reflow_file($$@) {
233 4     4 0 3197 my ($from, $to, @opts) = @_;
234 4         25 local $IO_Files = 1; # We are reading/writing files
235 4 50       19 $from = \*STDIN if ($from eq "");
236 4 50       14 $to = \*STDOUT if ($to eq "");
237 4 50 0     20 my $from_a_handle = (ref($from)
238             ? (ref($from) eq 'GLOB'
239             || UNIVERSAL::isa($from, 'GLOB')
240             || UNIVERSAL::isa($from, 'IO::Handle'))
241             : (ref(\$from) eq 'GLOB'));
242 4 50 0     28 my $to_a_handle = (ref($to)
243             ? (ref($to) eq 'GLOB'
244             || UNIVERSAL::isa($to, 'GLOB')
245             || UNIVERSAL::isa($to, 'IO::Handle'))
246             : (ref(\$to) eq 'GLOB'));
247 4         9 my $closefrom = 0;
248 4         8 my $closeto = 0;
249 4         57 local(*FROM, *TO);
250              
251 4 50       13 if ($from_a_handle) {
252             {
253 1     1   15 no warnings;
  1         3  
  1         423  
  0         0  
254 0         0 *FROM = *$from{FILEHANDLE};
255             }
256             } else {
257 4 50       25 $from = "./$from" if $from =~ /^\s/s;
258 4 50       158 open(FROM, "< $from\0") or croak "Cannot read `$from': $!";
259 4 50       22 binmode FROM or die "($!,$^E)";
260 4         8 $closefrom = 1;
261             }
262              
263 4 50       10 if ($to_a_handle) {
264             {
265 1     1   14 no warnings;
  1         3  
  1         1300  
  0         0  
266 0         0 *TO = *$to{FILEHANDLE};
267             }
268             } else {
269 4 50       17 $to = "./$to" if $to =~ /^\s/s;
270 4 50       293 open(TO,"> $to\0") or croak "Cannot write to `$to': $!";
271 4 50       19 binmode TO or die "($!,$^E)";
272 4         9 $closeto = 1;
273             }
274              
275 4         16 process_opts(@opts);
276 4         24 reflow();
277 4         16 restore_opts();
278              
279 4 50 50     547 close(TO) || croak("Cannot close `$to': $!") if ($closeto);
280 4 50 50     98 close(FROM) || croak("Cannot close `$from': $!") if ($closefrom);
281             }
282              
283              
284             sub reflow_string($@) {
285 11     11 0 9019 my ($input, @opts) = @_;
286 11         25 local $IO_Files = 0; # We are reading/writing arrays
287             # Create the array from the string, keep trailing empty lines.
288             # We split on newlines and then restore them, being careful
289             # not to add an extra newline at the end:
290 11         257 local @from = split(/\n/, $input, -1);
291 11 50       103 pop(@from) if ($from[$#from] eq "");
292 11         47 @from = map { "$_\n" } @from;
  356         923  
293 11         61 local @to = ();
294 11         47 process_opts(@opts);
295 11         34 reflow();
296 11         40 restore_opts();
297 11         255 return(join("", @to));
298             }
299              
300              
301             sub reflow_array($@) {
302 4     4 0 3451 my ($input, @opts) = @_;
303 4         11 local $IO_Files = 0; # We are reading/writing arrays
304 4         38 local @from = @$input;
305 4         10 local @to = ();
306 4         17 process_opts(@opts);
307 4         10 reflow();
308 4         14 restore_opts();
309 4         26 return(\@to);
310             }
311              
312              
313              
314             # Process the keyword options, set module global variables as required,
315             # save the old values on the @save_opts stack:
316              
317             sub process_opts(@) {
318 19     19 0 144 my @opts = @_;
319 19         30 my ($key, $value);
320 1     1   15 no strict 'refs';
  1         15  
  1         975  
321             # Fix an externally-set $optimum value:
322 19 50       162 $optimum = [$optimum] if ($optimum =~ /^\d+$/);
323 19         69 while (@opts) {
324 60         100 $key = shift(@opts);
325 60 50       191 croak "No value for option key `$key'" unless (@opts);
326 60         83 $value = shift(@opts);
327 60 50       218 croak "`$key' is not a valid option" unless ($keys{$key});
328 60 50       1391 croak "`$value' is not a suitable value for `$key'"
329             unless ($value =~ /^$keys{$key}$/);
330             # keyword "indent" is short for setting both indent1 and indent2:
331 60 100       275 if ($key eq "indent") {
    100          
332 1         4 $key = "indent1";
333 1         5 unshift(@opts, "indent2", $value);
334             } elsif ($key eq "optimum") {
335 14 50       113 if ($value =~ /^\d+$/) {
    50          
336 0         0 $value = [$value];
337             } elsif (ref($value) ne 'ARRAY') {
338 0         0 croak "`$value' is not a suitable value for `$key'";
339             }
340             }
341             # Save old value. Save a copy of the array if the value is a reference:
342 60 100       105 if (ref(${$key}) eq "ARRAY") {
  60         232  
343 14         25 push(@save_opts, $key, [@${$key}]);
  14         54  
344             } else {
345 46         107 push(@save_opts, $key, ${$key});
  46         113  
346             }
347 60         87 ${$key} = $value;
  60         286  
348             }
349             # Adjust $optimum and $maximum by $indent2 length:
350 19 100       103 if ($indent2 ne "") {
351 3         14 push(@save_opts, "optimum", $optimum, "maximum", $maximum);
352 3         7 $maximum -= length($indent2);
353 3         10 $optimum = [map { $_ - length($indent2) } @$optimum];
  7         20  
354             }
355             }
356              
357              
358             sub restore_opts() {
359 19     19 0 44 my ($key, $value);
360 1     1   15 no strict 'refs';
  1         3  
  1         6165  
361 19         83 while (@save_opts) {
362 66         100 $value = pop(@save_opts);
363 66         103 $key = pop(@save_opts);
364 66         109 ${$key} = $value;
  66         275  
365             }
366             }
367              
368              
369             sub get_line() {
370 655     655 0 790 my $line;
371 655 100       1190 if ($IO_Files) {
372 144         561 $line = ;
373             } else {
374 511         806 $line = shift(@from);
375             }
376 655 100       1782 return($line) unless defined($line);
377 636         1125 $line =~ tr/\015\032//d;
378 636         3062 $line =~ s/^$quote//;
379             # Check for eg $quote = "> " and $line = ">":
380 636         1061 my $quote_ns = $quote;
381 636 100       1620 if ($quote_ns =~ s/\s+$//) {
382 39 100       194 $line = "" if ($line =~ /^$quote_ns$/);
383             }
384 636         2074 return($line);
385             }
386              
387              
388             # Trim EOL spaces and print the lines:
389             sub print_lines(@) {
390 289     289 0 610 my @lines = @_;
391 289         440 map { s/[ \t]+\n/\n/gs } @lines;
  656         2850  
392 289 100       574 if ($IO_Files) {
393 69         324 print TO @lines;
394             } else {
395 220         912 push(@to, @lines)
396             }
397             }
398              
399              
400             sub reflow() {
401 19     19 0 42 my ($line, $last);
402 19 100       60 if ($skipto ne "") {
403 4         11 while (defined($line = get_line())) {
404 58         91 print_lines($line);
405 58 100       279 last if ($line =~ /^$skipto/);
406             }
407 4 50       10 croak "Skipto pattern `$skipto' not found!" unless (defined($line));
408             }
409              
410 19 100       87 if ($oneparagraph =~ /[Yy]/) {
    100          
411             # put all the lines into one paragraph
412 1         6 while (defined($line = get_line())) {
413 35         61 process($line);
414             }
415              
416             } elsif ($skipindented < 2) {
417 3         8 while (defined($line = get_line())) {
418 105 100 66     1131 if (($skipindented && ($line =~ /^($pin|\t).*\S/))
      33        
      66        
419             || (($noreflow ne "") && ($line =~ /$noreflow/))) {
420             # current line is indented, or a paragraph break:
421 21         42 reflow_para();
422 21         54 print_lines($indent1 . $line);
423             } else {
424             # Add line to current paragraph in @words:
425 84         179 process($line);
426             }
427             }
428              
429             } else {
430              
431 15         34 while (defined($line = get_line())) {
432 369 50 33     2583 if (($noreflow ne "") && ($line =~ /$noreflow/)) {
    100          
433             # current line is a paragraph break:
434 0         0 reflow_para();
435 0         0 print_lines($indent1 . $line);
436 0         0 next;
437             } elsif ($line =~ /^($pin|\t).*\S/) {
438             # current line may be poetry, check next line:
439 39         49 $last = $line;
440 39         73 $line = get_line();
441 39 50       90 if (!defined($line)) {
442 0         0 process($last);
443 0         0 last;
444             }
445 39 100       194 if ($line =~ /^($pin|\t).*\S/) {
446             # found some poetry, skip indented lines until end of input
447             # or a non-indented line found:
448 10         18 reflow_para();
449 10         29 print_lines($indent1 . $last);
450 10         45 print_lines($indent1 . $line);
451 10         31 while (defined($line = get_line())) {
452 30 50 33     365 last unless (($line =~ /^($pin|\t).*\S/)
      66        
453             || ($noreflow ne "" && $line =~ /$noreflow/));
454 20         57 print_lines($indent1 . $line);
455             }
456 10 50       29 last unless (defined($line)); # poetry at end of document
457             # $line is a non-poetic line
458             } else {
459             # $last had a poetry indent, but current line doesn't.
460             # Process last line:
461 29         76 process($last);
462             }
463             } # end of first poetry test
464             # current line is non-poetic, so process it:
465 369         715 process($line);
466             }
467             }
468             # reflow any remaining @words:
469 19         54 reflow_para();
470             }
471              
472              
473             # Process a non-poetry line by pushing the words onto @words
474             # If the line is blank, then reflow the paragraph of @words:
475              
476             sub process($) {
477 517     517 0 915 my ($line) = @_;
478             # current line is non-poetry
479             # remove spaces around dashes:
480 517         1605 $line =~ s/([^-])[ \t]*--[ \t]*([^-])/$1--$2/g;
481             # protect ". . ." ellipses:
482 517         743 $line =~ s/ \. \. \./\x9F\.\x9F\.\x9F\./g;
483 517         683 $line =~ s/\. \. \./\.\x9F\.\x9F\./g;
484 517         4756 @linewords = split(/\s+/, $line);
485 517 100 100     2811 shift(@linewords) if (@linewords && ($linewords[0] eq ""));
486             # If last word of previous line ends in a single hyphen,
487             # then append first word of this line:
488 517 50 100     3387 if (@linewords && @words && ($words[$#words] =~ /[a-zA-Z0-9]-$/)) {
      66        
489 0         0 $words[$#words] .= shift(@linewords);
490             }
491 517 100       1203 if ($#linewords == -1) {
492             # No words on this line
493 109 100       317 if ($oneparagraph !~ /[Yy]/) {
494             # end of paragraph
495 102         231 reflow_para();
496 102         312 print_lines("$indent1\n");
497             }
498             } else {
499             # add @linewords to @words,
500             # split on em dashes, ie word--word
501             # Move "--" from beginning of current word to end of last word:
502 408 50 66     1917 if (($#words >= 0) && ($linewords[0] =~ s/^--[^a-zA-Z0-9]*//)) {
503 0         0 $words[$#words] .= $&;
504 0 0       0 shift(@linewords) if ($linewords[0] eq "");
505             }
506 408         512 my $word;
507 408         826 foreach $word (@linewords) {
508 4545 100       8042 if ($word =~ /[^-]--[a-zA-Z0-9]/) {
509 65         210 @tmp = split(/--/, $word);
510             # restore the hyphens:
511 65         382 grep(s/$/--/, @tmp);
512             # remove an extra one at the end:
513 65         248 $tmp[$#tmp] =~ s/--$//;
514             # append @tmp to @words:
515 65         247 push (@words, @tmp);
516             } else {
517             # append $word to @words:
518 4480         8371 push (@words, $word);
519             }
520             }
521             }
522             }
523              
524              
525             sub reflow_para {
526 152 100   152 0 460 return() unless (@words);
527 68         156 reflow_penalties();
528 68         90 $lastbreak = 0;
529 68         135 $linkbreak[$wordcount] = 0;
530             # Create space for the result:
531 68         342 my $result = " " x (($wordcount + 2) * 8);
532 68         11402 $result = reflow_trial(unpack("H*", pack("N*", @$optimum)),
533             $maximum, $wordcount,
534             $penaltylimit, $semantic, $shortlast,
535             unpack("H*", pack("N*", @word_len)),
536             unpack("H*", pack("N*", @space_len)),
537             unpack("H*", pack("N*", @extra)),
538             $result);
539 68         1267 @linkbreak = unpack("N*", pack("H*", $result));
540             # Convert @linkbreak from unsigned to signed:
541 68 100       267 @linkbreak = map { $_ > 0xF0000000 ? -((0xFFFFFFFF - $_) + 1) : $_ + 0 } @linkbreak;
  4678         9272  
542 68         360 $lastbreak = shift(@linkbreak);
543 68         166 compute_output();
544 68         373 grep (s/\x9F/ /g, @output);
545 68         244 print_lines(@output);
546 68         605 @words = ();
547             }
548              
549              
550             # Add spaces to ends of sentences and calculate @extra array of penalties
551             sub reflow_penalties {
552 68     68 0 78 my $j;
553 68         162 $wordcount = $#words + 1;
554             # Add paragraph indentation to first word:
555 68 50       233 $words[0] = $indent1 . $words[0] if ($wordcount);
556 68         274 for ($j = 0; $j < $wordcount+1; $j++) {
557 4678         10433 $extra[$j] = 0;
558             }
559 68         159 for ($j = 0; $j < $wordcount; $j++) {
560 4610 100       17728 if ($words[$j] =~ /^([A-Za-z0-9-]+)["')]*([\.\:])["')]*$/) { # Period or colon
561 187 50 33     946 if (!defined($abbrev{$1}) || ($2 eq ":")) { # End of sentence
562 187         279 $extra[$j] += $sentence / 2;
563 187 50       491 $extra[$j-1] -= $sentence if ($j > 0);
564 187         276 $extra[$j+1] -= $sentence;
565 187 100       899 $words[$j] = $words[$j] . " " unless ($frenchspacing =~ /[Yy]/);
566             } else{
567             # Don't break "Mr. X"
568 0 0       0 $extra[$j] -= $namebreak if ($abbrev{$1} == 1);
569             }
570             }
571 4610 0 0     11297 if (($words[$j] =~ /[\?\!]["')]*$/) # !? after word
      33        
572             && (($j >= $#words) || ($words[$j+1] =~ /^[^a-zA-Z]*[A-Z]/))) {
573 0         0 $extra[$j] += $sentence / 2;
574 0 0       0 $extra[$j-1] -= $sentence if ($j > 0);
575 0         0 $extra[$j+1] -= $sentence;
576 0 0       0 $words[$j] = $words[$j] . " " unless ($frenchspacing =~ /[Yy]/);
577             }
578 4610 100       10018 if ($words[$j] =~ /\,$/) { # Comma after word
579 246         416 $extra[$j] += $dependent / 2;
580 246 50       664 $extra[$j-1] -= $dependent if ($j > 0);
581 246         335 $extra[$j+1] -= $dependent;
582             }
583 4610 100       15990 if ($words[$j] =~ /[\;\"\'\)]$|--$/) { # Punctuation after word
584 178         278 $extra[$j] += $independent / 2;
585 178 50       542 $extra[$j-1] -= $independent if ($j > 0);
586 178         275 $extra[$j+1] -= $independent;
587             }
588 4610 100 100     23535 if (($j < $#words)
589             && ($words[$j+1] =~ /^\(/)) { # Next word has opening parenthesis
590 1         3 $extra[$j] += $independent / 2;
591 1 50       7 $extra[$j-1] -= $independent if ($j > 0);
592 1         3 $extra[$j+1] -= $independent;
593             }
594 4610 100 100     23791 if (($j < $#words)
      100        
      66        
595             && ($words[$j] =~ /[A-Z]/ && $words[$j] !~ /\./
596             && $words[$j+1] =~ /[A-Z]/)) {
597 78         118 $extra[$j] -= $namebreak; # Don't break "United States"
598             }
599             $extra[$j] -= $connectives{$words[$j]} * $connpenalty
600 4610 100       18563 if (defined($connectives{$words[$j]}));
601             }
602              
603 68         318 @word_len = (); # Length of each word (excluding spaces)
604 68         260 @space_len = (); # Length the space after this word
605 68         238 for ($j = 0; $j < $wordcount; $j++) {
606 4610 100       12150 if ($words[$j] =~ /--$/) {
    100          
607 66         105 $word_len[$j] = length($words[$j]);
608 66         177 $space_len[$j] = 0;
609             } elsif ($words[$j] =~ / $/) {
610 176         262 $word_len[$j] = length($words[$j]) - 1;
611 176         475 $space_len[$j] = 2;
612             } else {
613 4368         5736 $word_len[$j] = length($words[$j]);
614 4368         10652 $space_len[$j] = 1;
615             }
616             }
617             # First word already has $indent1 added and will not be indented further:
618 68 50       249 $word_len[0] -= length($indent2) if ($wordcount);
619             }
620              
621              
622             # compute @output from $wordcount, @words, $lastbreak and @linkbreak
623              
624             sub compute_output {
625 68     68 0 83 my ($j, $terminus);
626 68         233 @output = ();
627 68         89 $terminus = $wordcount-1;
628 68         231 for ($j = 0; $terminus >= 0; $j++) {
629 435         1978 $output[$j] = join(' ', @words[$lastbreak+1..$terminus])."\n";
630             #print "j = $j, lastbreak = $lastbreak:\noutput = $output[$j]\n";
631 435         725 $terminus = $lastbreak;
632 435         1152 $lastbreak = $linkbreak[$lastbreak];
633             }
634 68         120 @output = reverse(@output);
635             # trim spaces after hyphens:
636 68         140 map { s/([^-])[ \t]*--[ \t*]([^-])/$1--$2/g } @output;
  435         1628  
637             # Add the indent to all but the first line:
638 68         205 map { $_ = $indent2 . $_ } @output[1..$#output];
  367         779  
639             }
640              
641              
642              
643              
644              
645             1;
646             __END__