File Coverage

blib/lib/Text/KnuthPlass.pm
Criterion Covered Total %
statement 122 169 72.1
branch 15 40 37.5
condition 8 30 26.6
subroutine 27 35 77.1
pod 7 7 100.0
total 179 281 63.7


line stmt bran cond sub pod time code
1             package Text::KnuthPlass;
2             require XSLoader;
3 3     3   214839 use constant DEBUG => 0;
  3         8  
  3         353  
4 3     3   23 use warnings;
  3         5  
  3         108  
5 3     3   16 use strict;
  3         11  
  3         222  
6              
7             our $VERSION = '1.02';
8             eval { XSLoader::load("Text::KnuthPlass", $VERSION); } or die $@;
9             # Or else there's a Perl version
10 3     3   3774 use Data::Dumper;
  3         40206  
  3         264  
11              
12             package Text::KnuthPlass::Element;
13 3     3   87 use base 'Class::Accessor';
  3         8  
  3         3516  
14             __PACKAGE__->mk_accessors("width");
15 174     174   347 sub new { my $self = shift; bless { width => 0, @_ }, $self }
  174         1022  
16 34     34   1747 sub is_penalty { return shift->isa("Text::KnuthPlass::Penalty") }
17 0     0   0 sub is_glue { return shift->isa("Text::KnuthPlass::Glue") }
18              
19             package Text::KnuthPlass::Box;
20 3     3   9824 use base 'Text::KnuthPlass::Element';
  3         10  
  3         2183  
21             __PACKAGE__->mk_accessors("value");
22              
23 0     0   0 sub _txt { return "[".$_[0]->value."/".$_[0]->width."]"; }
24              
25              
26             package Text::KnuthPlass::Glue;
27 3     3   20 use base 'Text::KnuthPlass::Element';
  3         7  
  3         1976  
28             __PACKAGE__->mk_accessors("stretch", "shrink");
29              
30 61     61   108 sub new { my $self = shift; $self->SUPER::new(stretch => 0, shrink => 0, @_) }
  61         152  
31 0     0   0 sub _txt { return sprintf "<%.2f+%.2f-%.2f>", $_[0]->width, $_[0]->stretch, $_[0]->shrink; }
32              
33             package Text::KnuthPlass::Penalty;
34 3     3   19 use base 'Text::KnuthPlass::Element';
  3         19  
  3         1988  
35             __PACKAGE__->mk_accessors("penalty", "flagged", "shrink");
36 28     28   244 sub new { my $self = shift; $self->SUPER::new(flagged => 0, shrink => 0, @_) }
  28         110  
37 0   0 0   0 sub _txt { "(".$_[0]->penalty.($_[0]->flagged &&"!").")"; }
38              
39             package Text::KnuthPlass::Breakpoint;
40 3     3   52 use base 'Text::KnuthPlass::Element';
  3         5  
  3         1548  
41             __PACKAGE__->mk_accessors(qw/position demerits ratio line fitnessClass totals previous/);
42              
43             package Text::KnuthPlass::DummyHyphenator;
44 3     3   45 use base 'Class::Accessor';
  3         103  
  3         401  
45 31     31   299 sub hyphenate { return $_[1] }
46              
47             package Text::KnuthPlass;
48 3     3   18 use base 'Class::Accessor';
  3         5  
  3         225  
49 3     3   26 use Carp qw/croak/;
  3         13  
  3         12373  
50              
51             my %defaults = (
52             infinity => 10000,
53             tolerance => 30,
54             hyphenpenalty => 50,
55             demerits => { line => 10, flagged => 100, fitness => 3000 },
56             space => { width => 3, stretch => 6, shrink => 9 },
57             linelengths => [78],
58             measure => sub { length $_[0] },
59             hyphenator =>
60             eval { require Text::Hyphen } ? Text::Hyphen->new() :
61             Text::KnuthPlass::DummyHyphenator->new()
62             );
63             __PACKAGE__->mk_accessors(keys %defaults);
64 4     4 1 171168 sub new { my $self = shift; bless {%defaults, @_}, $self }
  4         51  
65              
66             =head1 NAME
67              
68             Text::KnuthPlass - Breaks paragraphs into lines using the TeX algorithm
69              
70             =head1 SYNOPSIS
71              
72             use Text::KnuthPlass;
73             my $typesetter = Text::KnuthPlass->new();
74             my @lines = $typesetter->typeset($paragraph);
75             ...
76              
77             To use with plain text:
78              
79             for (@lines) {
80             for (@{$_->{nodes}}) {
81             if ($_->isa("Text::KnuthPlass::Box")) { print $_->value }
82             elsif ($_->isa("Text::KnuthPlass::Glue")) { print " " }
83             }
84             if ($_->{nodes}[-1]->is_penalty) { print "-" }
85             print "\n";
86             }
87              
88             To use with PDF::API2:
89              
90             my $text = $page->text;
91             $text->font($font, 12);
92             $text->lead(13.5);
93              
94             my $t = Text::KnuthPlass->new(
95             measure => sub { $text->advancewidth(shift) },
96             linelengths => [235]
97             );
98             my @lines = $t->typeset($paragraph);
99              
100             my $y = 500;
101             for my $line (@lines) {
102             $x = 50;
103             for my $node (@{$line->{nodes}}) {
104             $text->translate($x,$y);
105             if ($node->isa("Text::KnuthPlass::Box")) {
106             $text->text($node->value);
107             $x += $node->width;
108             } elsif ($node->isa("Text::KnuthPlass::Glue")) {
109             $x += $node->width + $line->{ratio} *
110             ($line->{ratio} < 0 ? $node->shrink : $node->stretch);
111             }
112             }
113             if ($line->{nodes}[-1]->is_penalty) { $text->text("-") }
114             $y -= $text->lead();
115             }
116              
117             =head1 METHODS
118              
119             =head2 new
120              
121             The constructor takes a number of options. The most important ones are:
122              
123             =over 3
124              
125             =item measure
126              
127             A subroutine reference to determine the width of a piece of text. This
128             defaults to C, which is what you want if you're
129             typesetting plain monospaced text. You will need to change this to plug
130             into your font metrics if you're doing something graphical.
131              
132             =item linelengths
133              
134             This is an array of line lengths. For instance, C< [30,40,50] > will
135             typeset a triangle-shaped piece of text with three lines. What if the
136             text spills over to more than three lines? In that case, the final value
137             in the array is used for all further lines. So to typeset an ordinary
138             block-shaped column of text, you only need specify an array with one
139             value: the default is C< [78] >.
140              
141             =item tolerance
142              
143             How much leeway we have in leaving wider spaces than the algorithm
144             would prefer.
145              
146             =item hyphenator
147              
148             An object which hyphenates words. If you have C installed
149             (highly recommended) then a C object is instantiated by
150             default; if not, an object of the class
151             C is instantiated - this simply finds
152             no hyphenation points at all. So to turn hyphenation off, set
153              
154             hyphenator => Text::KnuthPlass::DummyHyphenator->new()
155              
156             To typeset non-English text, pass in an object which responds to the
157             C method, returning a list of hyphen positions. (See
158             C for the interface.)
159              
160             =back
161              
162             There are other options for fine-tuning the output. If you know your way
163             around TeX, dig into the source to find out what they are.
164              
165             =head2 typeset
166              
167             This is the main interface to the algorithm, made up of the constituent
168             parts below. It takes a paragraph of text and returns a list of lines if
169             suitable breakpoints could be found.
170              
171             The list has the following structure:
172              
173             (
174             { nodes => \@nodes, ratio => $ratio },
175             { nodes => \@nodes, ratio => $ratio },
176             ...
177             )
178              
179             The node list in each element will be a list of objects. Each object
180             will be either C, C
181             or C. See below for more on these.
182              
183             The C is the amount of stretch or shrink which should be applied to
184             each glue element in this line. The corrected width of each glue node
185             should be:
186              
187             $node->width + $line->{ratio} *
188             ($line->{ratio} < 0 ? $node->shrink : $node->stretch);
189              
190             Each box, glue or penalty node has a C attribute. Boxes have
191             Cs, which are the text which went into them; glue has C
192             and C to determine how much it should vary in width. That should
193             be all you need for basic typesetting; for more, see the source, and see
194             the original Knuth-Plass paper in "Digital Typography".
195              
196             This method is a thin wrapper around the three methods below.
197              
198             =cut
199              
200             sub typeset {
201 2     2 1 48 my ($t, $paragraph, @args) = @_;
202 2         11 my @nodes = $t->break_text_into_nodes($paragraph, @args);
203 2         13 my @breakpoints = $t->break(\@nodes);
204 2 50       9 return unless @breakpoints;
205 2         10 my @lines = $t->breakpoints_to_lines(\@breakpoints, \@nodes);
206             # Remove final penalty and glue
207 2 50       7 if (@lines) {
208 2         4 pop @{ $lines[-1]->{nodes} } ;
  2         4  
209 2         3 pop @{ $lines[-1]->{nodes} } ;
  2         4  
210             }
211 2         51 @lines;
212             }
213              
214             =head2 break_text_into_nodes
215              
216             This turns a paragraph into a list of box/glue/penalty nodes. It's
217             fairly basic, and designed to be overloaded. It should also support
218             multiple justification styles (centering, ragged right, etc.) but this
219             will come in a future release; right now, it just does full
220             justification.
221              
222             If you are doing clever typography or using non-Western languages you
223             may find that you will want to break text into nodes yourself, and pass
224             the list of nodes to the methods below, instead of using this method.
225              
226             =cut
227              
228             sub _add_word {
229 61     61   90 my ($self, $word, $nodes_r) = @_;
230 61         154 my @elems = $self->hyphenator->hyphenate($word);
231 61         4299 for (0..$#elems) {
232 85         94 push @{$nodes_r}, Text::KnuthPlass::Box->new(
  85         265  
233             width => $self->measure->($elems[$_]),
234             value => $elems[$_]
235             );
236 85 100       341 if ($_ != $#elems) {
237 24         28 push @{$nodes_r}, Text::KnuthPlass::Penalty->new(
  24         73  
238             flagged => 1, penalty => $self->hyphenpenalty);
239             }
240             }
241             }
242              
243             sub break_text_into_nodes {
244 4     4 1 32 my ($self, $text, $style) = @_;
245 4         6 my @nodes;
246 4         33 my @words = split /\s+/, $text;
247              
248 4         19 $self->{emwidth} = $self->measure->("M");
249 4         53 $self->{spacewidth} = $self->measure->(" ");
250 4         46 $self->{spacestretch} = $self->{spacewidth} * $self->space->{width} / $self->space->{stretch};
251 4         71 $self->{spaceshrink} = $self->{spacewidth} * $self->space->{width} / $self->space->{shrink};
252              
253 4   50     76 $style ||= "justify";
254 4         10 my $spacing_type = "_add_space_$style";
255 4         9 my $start = "_start_$style";
256 4         15 $self->$start(\@nodes);
257              
258 4         13 for (0..$#words) { my $word = $words[$_];
  61         97  
259 61         129 $self->_add_word($word, \@nodes);
260 61         184 $self->$spacing_type(\@nodes,$_ == $#words);
261             }
262 4         50 return @nodes;
263             }
264              
265 4     4   7 sub _start_justify { }
266             sub _add_space_justify {
267 61     61   102 my ($self, $nodes_r, $final) = @_;
268 61 100       107 if ($final) {
269 4         4 push @{$nodes_r},
  4         11  
270             $self->glueclass->new(
271             width => 0,
272             stretch => $self->infinity,
273             shrink => 0),
274             $self->penaltyclass->new(width => 0, penalty => -$self->infinity, flagged => 1);
275             } else {
276 57         52 push @{$nodes_r}, $self->glueclass->new(
  57         134  
277             width => $self->{spacewidth},
278             stretch => $self->{spacestretch},
279             shrink => $self->{spaceshrink}
280             );
281             }
282             }
283              
284             sub _start_center {
285 0     0   0 my ($self, $nodes_r) = @_;
286 0         0 push @{$nodes_r},
  0         0  
287             Text::KnuthPlass::Box->new(value => ""),
288             Text::KnuthPlass::Glue->new(
289             width => 0,
290             stretch => 2*$self->{emwidth},
291             shrink => 0)
292             }
293              
294             sub _add_space_center {
295 0     0   0 my ($self, $nodes_r, $final) = @_;
296 0 0       0 if ($final) {
297 0         0 push @{$nodes_r},
  0         0  
298             Text::KnuthPlass::Glue->new( width => 0, stretch => 2*$self->{emwidth}, shrink => 0),
299             Text::KnuthPlass::Penalty->new(width => 0, penalty => -$self->infinity, flagged => 0);
300             } else {
301 0         0 push @{$nodes_r},
  0         0  
302             Text::KnuthPlass::Glue->new( width => 0, stretch => 2*$self->{emwidth}, shrink => 0),
303             Text::KnuthPlass::Penalty->new(width => 0, penalty => 0, flagged => 0),
304             Text::KnuthPlass::Glue->new( width => $self->{spacewidth}, stretch => -4*$self->{emwidth}, shrink => 0),
305             Text::KnuthPlass::Box->new(value => ""),
306             Text::KnuthPlass::Penalty->new(width => 0, penalty => $self->infinity, flagged => 0),
307             Text::KnuthPlass::Glue->new( width => 0, stretch => 2*$self->{emwidth}, shrink => 0),
308             }
309             }
310              
311             =head2 break
312              
313             This implements the main body of the algorithm; it turns a list of nodes
314             (produced from the above method) into a list of breakpoint objects.
315              
316             =cut
317              
318             sub _init_nodelist { # Overridden by XS
319             shift->{activeNodes} = [
320             Text::KnuthPlass::Breakpoint->new(position => 0,
321             demerits => 0,
322             ratio => 0,
323             line => 0,
324             fitnessClass => 0,
325             totals => { width => 0, stretch => 0, shrink => 0}
326             )
327             ];
328             }
329              
330             sub break {
331 2     2 1 4 my ($self, $nodes) = @_;
332 2         8 $self->{sum} = {width => 0, stretch => 0, shrink => 0 };
333 2         37 $self->_init_nodelist();
334 2 50 33     19 if (!$self->{linelengths} or ref $self->{linelengths} ne "ARRAY") {
335 0         0 croak "No linelengths set";
336             }
337              
338 2         6 for (0..$#$nodes) {
339 146         1081 my $node = $nodes->[$_];
340 146 100 33     729 if ($node->isa("Text::KnuthPlass::Box")) {
    100          
    50          
341 72         200 $self->{sum}{width} += $node->width;
342             } elsif ($node->isa("Text::KnuthPlass::Glue")) {
343 54 50 33     305 if ($_ > 0 and $nodes->[$_-1]->isa("Text::KnuthPlass::Box")) {
344 54         6490 $self->_mainloop($node, $_, $nodes);
345             }
346 54         187 $self->{sum}{width} += $node->width;
347 54         596 $self->{sum}{stretch} += $node->stretch;
348 54         506 $self->{sum}{shrink} += $node->shrink;
349             } elsif ($node->is_penalty and $node->penalty != $self->infinity) {
350 20         1403 $self->_mainloop($node, $_, $nodes);
351             }
352             }
353              
354 2         30 my @retval = reverse $self->_active_to_breaks;
355 2         9 $self->_cleanup;
356 2         10 return @retval;
357             }
358              
359             sub _cleanup { }
360              
361             sub _active_to_breaks { # Overridden by XS
362             my $self = shift;
363             return unless @{$self->{activeNodes}};
364             my @breaks;
365             my $tmp = Text::KnuthPlass::Breakpoint->new(demerits => ~0);
366             for (@{$self->{activeNodes}}) { $tmp = $_ if $_->demerits < $tmp->demerits }
367             while ($tmp) {
368             push @breaks, { position => $tmp->position,
369             ratio => $tmp->ratio
370             };
371             $tmp = $tmp->previous
372             }
373             return @breaks;
374             }
375              
376             sub _mainloop {
377             my ($self, $node, $index, $nodes) = @_;
378             my $next; my $ratio = 0; my $demerits = 0; my @candidates;
379             my $badness; my $currentLine = 0; my $tmpSum; my $currentClass = 0;
380             my $active = $self->{activeNodes}[0];
381             my $ptr = 0;
382             while ($active) {
383             @candidates = ( {demerits => ~0}, {demerits => ~0},{demerits => ~0},{demerits => ~0} );
384             warn "Outer\n" if DEBUG;
385             while ($active) {
386             my $next = $self->{activeNodes}[++$ptr];
387             warn "Inner loop\n" if DEBUG;
388             $currentLine = $active->line+1;
389             $ratio = $self->_computeCost($active->position, $index, $active, $currentLine, $nodes);
390             warn "Got a ratio of $ratio, node is ".$node->_txt."\n" if DEBUG;
391             if ($ratio < -1 or
392             ($node->is_penalty and $node->penalty == -$self->infinity)) {
393             warn "Dropping a node\n" if DEBUG;
394             $self->{activeNodes} = [ grep {$_ != $active} @{$self->{activeNodes}} ];
395             $ptr--;
396             }
397             if (-1 <= $ratio and $ratio <= $self->tolerance) {
398             $badness = 100 * $ratio**3;
399             warn "Badness is $badness\n" if DEBUG;
400             if ($node->is_penalty and $node->penalty > 0) {
401             $demerits = ($self->demerits->{line} + $badness +
402             $node->penalty)**2;
403             } elsif ($node->is_penalty and $node->penalty != -$self->infinity) {
404             $demerits = ($self->demerits->{line} + $badness -
405             $node->penalty)**2;
406             } else {
407             $demerits = ($self->demerits->{line} + $badness)**2;
408             }
409              
410             if ($node->is_penalty and $nodes->[$active->position]->is_penalty) {
411             $demerits += $self->demerits->{flagged} *
412             $node->flagged *
413             $nodes->[$active->position]->flagged;
414             }
415              
416             if ($ratio < -0.5) { $currentClass = 0 }
417             elsif ($ratio <= 0.5) { $currentClass = 1 }
418             elsif ($ratio <= 1 ) { $currentClass = 2 }
419             else { $currentClass = 3 }
420              
421             $demerits += $self->demerits->{fitness}
422             if abs($currentClass - $active->fitnessClass) > 1;
423              
424             $demerits += $active->demerits;
425             if ($demerits < $candidates[$currentClass]->{demerits}) {
426             warn "Setting c $currentClass\n" if DEBUG;
427             $candidates[$currentClass] = { active => $active,
428             demerits => $demerits,
429             ratio => $ratio
430             };
431             }
432             }
433             $active = $next;
434             #warn "Active is now $active" if DEBUG;
435             last if !$active or
436             $active->line >= $currentLine;
437             }
438             warn "Post inner loop\n" if DEBUG;
439             $tmpSum = $self->_computeSum($index, $nodes);
440             for (0..3) { my $c = $candidates[$_];
441             if ($c->{demerits} < ~0) {
442             my $newnode = Text::KnuthPlass::Breakpoint->new(
443             position => $index,
444             demerits => $c->{demerits},
445             ratio => $c->{ratio},
446             line => $c->{active}->line + 1,
447             fitnessClass => $_,
448             totals => $tmpSum,
449             previous => $c->{active}
450             );
451             if ($active) {
452             warn "Before\n" if DEBUG;
453             my @newlist;
454             for (@{$self->{activeNodes}}) {
455             if ($_ == $active) { push @newlist, $newnode }
456             push @newlist, $_;
457             }
458             $ptr++;
459             $self->{activeNodes} = [ @newlist ];
460             # grep {;
461             # ($_ == $active) ? ($newnode, $active) : ($_)
462             #} @{$self->{activeNodes}}
463             # ];
464             }
465             else {
466             warn "After\n" if DEBUG;
467             push @{$self->{activeNodes}}, $newnode
468             }
469             #warn @{$self->{activeNodes}} if DEBUG;
470             }
471             }
472             }
473             }
474              
475             sub _computeCost {
476 0     0   0 my ($self, $start, $end, $active, $currentLine, $nodes) = @_;
477 0         0 warn "Computing cost from $start to $end\n" if DEBUG;
478 0         0 warn sprintf "Sum width: %f\n", $self->{sum}{width} if DEBUG;
479 0         0 warn sprintf "Total width: %f\n", $self->{totals}{width} if DEBUG;
480 0         0 my $width = $self->{sum}{width} - $active->totals->{width};
481 0         0 my $stretch = 0; my $shrink = 0;
  0         0  
482 0 0       0 my $linelength = $currentLine < @{$self->linelengths} ?
  0         0  
483             $self->{linelengths}[$currentLine-1] :
484             $self->{linelengths}[-1];
485              
486 0 0 0     0 warn "Adding penalty width" if($nodes->[$end]->is_penalty) and DEBUG;
487 0 0       0 $width += $nodes->[$end]->width if $nodes->[$end]->is_penalty;
488 0         0 warn sprintf "Width %f, linelength %f\n", $width, $linelength if DEBUG;
489 0 0       0 if ($width < $linelength) {
    0          
490 0         0 $stretch = $self->{sum}{stretch} - $active->totals->{stretch};
491 0         0 warn sprintf "Stretch %f\n", $stretch if DEBUG;
492 0 0       0 if ($stretch > 0) {
493 0         0 return ($linelength - $width) / $stretch;
494 0         0 } else { return $self->infinity}
495             } elsif ($width > $linelength) {
496 0         0 $shrink = $self->{sum}{shrink} - $active->totals->{shrink};
497 0         0 warn sprintf "Shrink %f\n", $shrink if DEBUG;
498 0 0       0 if ($shrink > 0) {
499 0         0 return ($linelength - $width) / $shrink;
500 0         0 } else { return $self->infinity}
501 0         0 } else { return 0 }
502             }
503              
504             sub _computeSum {
505 0     0   0 my ($self, $index, $nodes) = @_;
506 0         0 my $result = { width => $self->{sum}{width},
507             stretch => $self->{sum}{stretch}, shrink => $self->{sum}{shrink} };
508 0         0 for ($index..$#$nodes) {
509 0 0 0     0 if ($nodes->[$_]->isa("Text::KnuthPlass::Glue")) {
    0 0        
      0        
510 0         0 $result->{width} += $nodes->[$_]->width;
511 0         0 $result->{stretch} += $nodes->[$_]->stretch;
512 0         0 $result->{shrink} += $nodes->[$_]->shrink;
513             } elsif ($nodes->[$_]->isa("Text::KnuthPlass::Box") or
514             ($nodes->[$_]->is_penalty and $nodes->[$_]->penalty ==
515 0         0 -$self->infinity and $_ > $index)) { last }
516             }
517 0         0 return $result;
518             }
519              
520             =head2 breakpoints_to_lines
521              
522             And this takes the breakpoints and the nodes, and assembles them into
523             lines.
524              
525             =cut
526              
527             sub breakpoints_to_lines {
528 2     2 1 19 my ($self, $breakpoints, $nodes) = @_;
529 2         4 my @lines;
530 2         3 my $linestart = 0;
531 2         9 for my $x (1 .. $#$breakpoints) { $_ = $breakpoints->[$x];
  8         14  
532 8         16 my $position = $_->{position};
533 8         11 my $r = $_->{ratio};
534 8         18 for ($linestart..$#$nodes) {
535 14 100 66     125 if ($nodes->[$_]->isa("Text::KnuthPlass::Box") or
      66        
536             ($nodes->[$_]->is_penalty and $nodes->[$_]->penalty ==-$self->infinity)) {
537 8         10 $linestart = $_;
538 8         13 last;
539             }
540             }
541 8         42 push @lines, { ratio => $r, position => $_->{position},
542 8         27 nodes => [ @{$nodes}[$linestart..$position] ]};
543 8         24 $linestart = $_->{position};
544             }
545             #if ($linestart < $#$nodes) {
546             # push @lines, { ratio => 1, position => $#$nodes,
547             # nodes => [ @{$nodes}[$linestart+1..$#$nodes] ]};
548             #}
549 2         9 return @lines;
550             }
551              
552             =head2 glueclass
553              
554             =head2 penaltyclass
555              
556             For subclassers.
557              
558             =cut
559              
560 61     61 1 239 sub glueclass { "Text::KnuthPlass::Glue" }
561 4     4 1 43 sub penaltyclass { "Text::KnuthPlass::Penalty" }
562              
563             =head1 AUTHOR
564              
565             Simon Cozens, C<< >>
566              
567             =head1 ACKNOWLEDGEMENTS
568              
569             This module is a Perl translation of Bram Stein's Javascript Knuth-Plass
570             implementation. Any bugs, however, are probably my fault.
571              
572             =head1 BUGS
573              
574             Please report any bugs or feature requests to C, or through
575             the web interface at L. I will be notified, and then you'll
576             automatically be notified of progress on your bug as I make changes.
577              
578             =head1 COPYRIGHT & LICENSE
579              
580             Copyright 2011 Simon Cozens.
581              
582             This program is released under the following license: Perl, GPL
583              
584             =cut
585              
586             1; # End of Text::KnuthPlass