File Coverage

blib/lib/String/Formatter.pm
Criterion Covered Total %
statement 138 147 93.8
branch 22 36 61.1
condition 15 24 62.5
subroutine 27 28 96.4
pod 11 13 84.6
total 213 248 85.8


line stmt bran cond sub pod time code
1 5     5   280078 use v5.8.0;
  5         57  
2 5     5   22 use strict;
  5         8  
  5         99  
3 5     5   31 use warnings;
  5         8  
  5         294  
4             package String::Formatter 1.235;
5             # ABSTRACT: build sprintf-like functions of your own
6              
7             #pod =head1 SYNOPSIS
8             #pod
9             #pod use String::Formatter stringf => {
10             #pod -as => 'str_rf',
11             #pod codes => {
12             #pod f => sub { $_ },
13             #pod b => sub { scalar reverse $_ },
14             #pod o => 'Okay?',
15             #pod },
16             #pod };
17             #pod
18             #pod print str_rf('This is %10f and this is %-15b, %o', 'forward', 'backward');
19             #pod
20             #pod ...prints...
21             #pod
22             #pod This is forward and this is drawkcab , okay?
23             #pod
24             #pod =head1 DESCRIPTION
25             #pod
26             #pod String::Formatter is a tool for building sprintf-like formatting routines.
27             #pod It supports named or positional formatting, custom conversions, fixed string
28             #pod interpolation, and simple width-matching out of the box. It is easy to alter
29             #pod its behavior to write new kinds of format string expanders. For most cases, it
30             #pod should be easy to build all sorts of formatters out of the options built into
31             #pod String::Formatter.
32             #pod
33             #pod Normally, String::Formatter will be used to import a sprintf-like routine
34             #pod referred to as "C", but which can be given any name you like. This
35             #pod routine acts like sprintf in that it takes a string and some inputs and returns
36             #pod a new string:
37             #pod
38             #pod my $output = stringf "Some %a format %s for you to %u.\n", { ... };
39             #pod
40             #pod This routine is actually a wrapper around a String::Formatter object created by
41             #pod importing stringf. In the following code, the entire hashref after "stringf"
42             #pod is passed to String::Formatter's constructor (the C method), save for the
43             #pod C<-as> key and any other keys that start with a dash.
44             #pod
45             #pod use String::Formatter
46             #pod stringf => {
47             #pod -as => 'fmt_time',
48             #pod codes => { ... },
49             #pod format_hunker => ...,
50             #pod input_processor => ...,
51             #pod },
52             #pod stringf => {
53             #pod -as => 'fmt_date',
54             #pod codes => { ... },
55             #pod string_replacer => ...,
56             #pod hunk_formatter => ...,
57             #pod },
58             #pod ;
59             #pod
60             #pod As you can see, this will generate two stringf routines, with different
61             #pod behaviors, which are installed with different names. Since the behavior of
62             #pod these routines is based on the C method of a String::Formatter object,
63             #pod the rest of the documentation will describe the way the object behaves.
64             #pod
65             #pod There's also a C export, which behaves just like the C
66             #pod export, but defaults to the C and C
67             #pod arguments. There's a C export, which defaults
68             #pod C and C. Finally, a C,
69             #pod which defaults to C and C. For more
70             #pod on these, keep reading, and check out the cookbook.
71             #pod
72             #pod L provides a number of recipes for ways to put
73             #pod String::Formatter to use.
74             #pod
75             #pod =head1 FORMAT STRINGS
76             #pod
77             #pod Format strings are generally assumed to look like Perl's sprintf's format
78             #pod strings:
79             #pod
80             #pod There's a bunch of normal strings and then %s format %1.4c with %% signs.
81             #pod
82             #pod The exact semantics of the format codes are not totally settled yet -- and they
83             #pod can be replaced on a per-formatter basis. Right now, they're mostly a subset
84             #pod of Perl's astonishingly large and complex system. That subset looks like this:
85             #pod
86             #pod % - a percent sign to begin the format
87             #pod ... - (optional) various modifiers to the format like "-5" or "#" or "2$"
88             #pod {..} - (optional) a string inside braces
89             #pod s - a short string (usually one character) identifying the conversion
90             #pod
91             #pod Not all format modifiers found in Perl's C are yet supported.
92             #pod Currently the only format modifiers must match:
93             #pod
94             #pod (-)? # left-align, rather than right
95             #pod (\d*)? # (optional) minimum field width
96             #pod (?:\.(\d*))? # (optional) maximum field width
97             #pod
98             #pod Some additional format semantics may be added, but probably nothing exotic.
99             #pod Even things like C<2$> and C<*> are probably not going to appear in
100             #pod String::Formatter's default behavior.
101             #pod
102             #pod Another subtle difference, introduced intentionally, is in the handling of
103             #pod C<%%>. With the default String::Formatter behavior, string C<%%> is not
104             #pod interpreted as a formatting code. This is different from the behavior of
105             #pod Perl's C, which interprets it as a special formatting character that
106             #pod doesn't consume input and always acts like the fixed string C<%>. The upshot
107             #pod of this is:
108             #pod
109             #pod sprintf "%%"; # ==> returns "%"
110             #pod stringf "%%"; # ==> returns "%%"
111             #pod
112             #pod sprintf "%10%"; # ==> returns " %"
113             #pod stringf "%10%"; # ==> dies: unknown format code %
114             #pod
115             #pod =cut
116              
117 5     5   1928 use Params::Util ();
  5         25869  
  5         1219  
118             use Sub::Exporter -setup => {
119             exports => {
120             stringf => sub {
121 2         216 my ($class, $name, $arg, $col) = @_;
122 2         5 my $formatter = $class->new($arg);
123 2     2   8 return sub { $formatter->format(@_) };
  2         729  
124             },
125             method_stringf => sub {
126 0         0 my ($class, $name, $arg, $col) = @_;
127 0         0 my $formatter = $class->new({
128             input_processor => 'require_single_input',
129             string_replacer => 'method_replace',
130             %$arg,
131             });
132 0         0 return sub { $formatter->format(@_) };
  0         0  
133             },
134             named_stringf => sub {
135 1         23 my ($class, $name, $arg, $col) = @_;
136 1         4 my $formatter = $class->new({
137             input_processor => 'require_named_input',
138             string_replacer => 'named_replace',
139             %$arg,
140             });
141 1     1   4 return sub { $formatter->format(@_) };
  1         411  
142             },
143             indexed_stringf => sub {
144 1         21 my ($class, $name, $arg, $col) = @_;
145 1         4 my $formatter = $class->new({
146             input_processor => 'require_arrayref_input',
147             string_replacer => 'indexed_replace',
148             %$arg,
149             });
150 1     1   4 return sub { $formatter->format(@_) };
  1         403  
151             },
152             },
153 5     5   2781 };
  5         29304  
  5         92  
154              
155             my %METHODS;
156             BEGIN {
157 5     5   27 %METHODS = (
158             format_hunker => 'hunk_simply',
159             input_processor => 'return_input',
160             string_replacer => 'positional_replace',
161             hunk_formatter => 'format_simply',
162             );
163              
164 5     5   2047 no strict 'refs';
  5         9  
  5         580  
165 5         15 for my $method (keys %METHODS) {
166 20     83   95 *$method = sub { $_[0]->{ $method } };
  83         125  
167              
168 20         41 my $default = "default_$method";
169 20     30   5620 *$default = sub { $METHODS{ $method } };
  30         91  
170             }
171             }
172              
173             #pod =method new
174             #pod
175             #pod my $formatter = String::Formatter->new({
176             #pod codes => { ... },
177             #pod format_hunker => ...,
178             #pod input_processor => ...,
179             #pod string_replacer => ...,
180             #pod hunk_formatter => ...,
181             #pod });
182             #pod
183             #pod This returns a new formatter. The C argument contains the formatting
184             #pod codes for the formatter in the form:
185             #pod
186             #pod codes => {
187             #pod s => 'fixed string',
188             #pod S => 'different string',
189             #pod c => sub { ... },
190             #pod }
191             #pod
192             #pod Code values (or "conversions") should either be strings or coderefs. This
193             #pod hashref can be accessed later with the C method.
194             #pod
195             #pod The other four arguments change how the formatting occurs. Formatting happens
196             #pod in five phases:
197             #pod
198             #pod =for :list
199             #pod 1. format_hunker - format string is broken down into fixed and %-code hunks
200             #pod 2. input_processor - the other inputs are validated and processed
201             #pod 3. string_replacer - replacement strings are generated by using conversions
202             #pod 4. hunk_formatter - replacement strings in hunks are formatted
203             #pod 5. all hunks, now strings, are recombined; this phase is just C
204             #pod
205             #pod The defaults are found by calling C for each helper that
206             #pod isn't given. Values must be either strings (which are interpreted as method
207             #pod names) or coderefs. The semantics for each method are described in the
208             #pod methods' sections, below.
209             #pod
210             #pod =cut
211              
212             sub default_codes {
213 11     11 0 25 return {};
214             }
215              
216             sub new {
217 11     11 1 2106 my ($class, $arg) = @_;
218              
219             my $_codes = {
220 11         36 %{ $class->default_codes },
221 11 50       17 %{ $arg->{codes} || {} },
  11         82  
222             };
223              
224 11         33 my $self = bless { codes => $_codes } => $class;
225              
226 11         48 for (keys %METHODS) {
227 44   66     95 $self->{ $_ } = $arg->{ $_ } || do {
228             my $default_method = "default_$_";
229             $class->$default_method;
230             };
231              
232 44 50       165 $self->{$_} = $self->can($self->{$_}) unless ref $self->{$_};
233             }
234              
235 11         40 my $codes = $self->codes;
236              
237 11         23 return $self;
238             }
239              
240 32     32 0 49 sub codes { $_[0]->{codes} }
241              
242             #pod =method format
243             #pod
244             #pod my $result = $formatter->format( $format_string, @input );
245             #pod
246             #pod print $formatter->format("My %h is full of %e.\n", 'hovercraft', 'eels');
247             #pod
248             #pod This does the actual formatting, calling the methods described above, under
249             #pod C> and returning the result.
250             #pod
251             #pod =cut
252              
253             sub format {
254 21     21 1 4773 my $self = shift;
255 21         35 my $format = shift;
256              
257 21 50       44 Carp::croak("not enough arguments for stringf-based format")
258             unless defined $format;
259              
260 21         39 my $hunker = $self->format_hunker;
261 21         42 my $hunks = $self->$hunker($format);
262              
263 21         41 my $processor = $self->input_processor;
264 21         55 my $input = $self->$processor([ @_ ]);
265              
266 21         42 my $replacer = $self->string_replacer;
267 21         52 $self->$replacer($hunks, $input);
268              
269 20         77 my $formatter = $self->hunk_formatter;
270 20   100     77 ref($_) and $_ = $self->$formatter($_) for @$hunks;
271              
272 20         55 my $string = join q{}, @$hunks;
273              
274 20         57 return $string;
275             }
276              
277             #pod =method format_hunker
278             #pod
279             #pod Format hunkers are passed strings and return arrayrefs containing strings (for
280             #pod fixed content) and hashrefs (for formatting code sections).
281             #pod
282             #pod The hashref hunks should contain at least two entries: C for the
283             #pod conversion code (the s, d, or u in %s, %d, or %u); and C for the
284             #pod complete original text of the hunk. For example, a bare minimum hunker should
285             #pod turn the following:
286             #pod
287             #pod I would like to buy %d %s today.
288             #pod
289             #pod ...into...
290             #pod
291             #pod [
292             #pod 'I would like to buy ',
293             #pod { conversion => 'd', literal => '%d' },
294             #pod ' ',
295             #pod { conversion => 's', literal => '%d' },
296             #pod ' today.',
297             #pod ]
298             #pod
299             #pod Another common entry is C. In the format strings expected by
300             #pod C, for example, these are free strings inside of curly braces.
301             #pod These are used extensively other existing helpers for things liked accessing
302             #pod named arguments or providing method names.
303             #pod
304             #pod =method hunk_simply
305             #pod
306             #pod This is the default format hunker. It implements the format string semantics
307             #pod L.
308             #pod
309             #pod This hunker will produce C and C and C. Its
310             #pod other entries are not yet well-defined for public consumption.
311             #pod
312             #pod =cut
313              
314             my $regex = qr/
315             (% # leading '%'
316             (-)? # left-align, rather than right
317             ([0-9]+)? # (optional) minimum field width
318             (?:\.([0-9]*))? # (optional) maximum field width
319             (?:{(.*?)})? # (optional) stuff inside
320             (\S) # actual format character
321             )
322             /x;
323              
324             sub hunk_simply {
325 21     21 1 37 my ($self, $string) = @_;
326              
327 21         24 my @to_fmt;
328 21         27 my $pos = 0;
329              
330 21         306 while ($string =~ m{\G(.*?)$regex}gs) {
331 36         201 push @to_fmt, $1, {
332             alignment => $3,
333             min_width => $4,
334             max_width => $5,
335              
336             literal => $2,
337             argument => $6,
338             conversion => $7,
339             };
340              
341 36 100       79 $to_fmt[-1] = '%' if $to_fmt[-1]{literal} eq '%%';
342              
343 36         166 $pos = pos $string;
344             }
345              
346 21 100       68 push @to_fmt, substr $string, $pos if $pos < length $string;
347              
348 21         49 return \@to_fmt;
349             }
350              
351             #pod =method input_processor
352             #pod
353             #pod The input processor is responsible for inspecting the post-format-string
354             #pod arguments, validating them, and returning them in a possibly-transformed form.
355             #pod The processor is passed an arrayref containing the arguments and should return
356             #pod a scalar value to be used as the input going forward.
357             #pod
358             #pod =method return_input
359             #pod
360             #pod This input processor, the default, simply returns the input it was given with
361             #pod no validation or transformation.
362             #pod
363             #pod =cut
364              
365             sub return_input {
366 14     14 1 21 return $_[1];
367             }
368              
369             #pod =method require_named_input
370             #pod
371             #pod This input processor will raise an exception unless there is exactly one
372             #pod post-format-string argument to the format call, and unless that argument is a
373             #pod hashref. It will also replace the arrayref with the given hashref so
374             #pod subsequent phases of the format can avoid lots of needless array dereferencing.
375             #pod
376             #pod =cut
377              
378             sub require_named_input {
379 3     3 1 7 my ($self, $args) = @_;
380              
381 3 50 33     21 Carp::croak("routine must be called with exactly one hashref arg")
382             if @$args != 1 or ! Params::Util::_HASHLIKE($args->[0]);
383              
384 3         6 return $args->[0];
385             }
386              
387             #pod =method require_arrayref_input
388             #pod
389             #pod This input processor will raise an exception unless there is exactly one
390             #pod post-format-string argument to the format call, and unless that argument is a
391             #pod arrayref. It will also replace the input with that single arrayref it found so
392             #pod subsequent phases of the format can avoid lots of needless array dereferencing.
393             #pod
394             #pod =cut
395              
396             sub require_arrayref_input {
397 1     1 1 3 my ($self, $args) = @_;
398              
399 1 50 33     8 Carp::croak("routine must be called with exactly one arrayref arg")
400             if @$args != 1 or ! Params::Util::_ARRAYLIKE($args->[0]);
401              
402 1         2 return $args->[0];
403             }
404              
405             #pod =method require_single_input
406             #pod
407             #pod This input processor will raise an exception if more than one input is given.
408             #pod After input processing, the single element in the input will be used as the
409             #pod input itself.
410             #pod
411             #pod =cut
412              
413             sub require_single_input {
414 3     3 1 7 my ($self, $args) = @_;
415              
416 3 50       9 Carp::croak("routine must be called with exactly one argument after string")
417             if @$args != 1;
418              
419 3         5 return $args->[0];
420             }
421              
422             #pod =method forbid_input
423             #pod
424             #pod This input processor will raise an exception if any input is given. In other
425             #pod words, formatters with this input processor accept format strings and nothing
426             #pod else.
427             #pod
428             #pod =cut
429              
430             sub forbid_input {
431 0     0 1 0 my ($self, $args) = @_;
432              
433 0 0       0 Carp::croak("routine must be called with no arguments after format string")
434             if @$args;
435              
436 0         0 return $args;
437             }
438              
439             #pod =method string_replacer
440             #pod
441             #pod The string_replacer phase is responsible for adding a C entry to
442             #pod format code hunks. This should be a string-value entry that will be formatted
443             #pod and concatenated into the output string. String replacers can also replace the
444             #pod whole hunk with a string to avoid any subsequent formatting.
445             #pod
446             #pod =method positional_replace
447             #pod
448             #pod This replacer matches inputs to the hunk's position in the format string. This
449             #pod is the default replacer, used in the L, which should
450             #pod make its behavior clear. At present, fixed-string conversions B affect
451             #pod the position of arg matched, meaning that given the following:
452             #pod
453             #pod my $formatter = String::Formatter->new({
454             #pod codes => {
455             #pod f => 'fixed string',
456             #pod s => sub { ... },
457             #pod }
458             #pod });
459             #pod
460             #pod $formatter->format("%s %f %s", 1, 2);
461             #pod
462             #pod The subroutine is called twice, once for the input C<1> and once for the input
463             #pod C<2>. B after some more experimental use.
464             #pod
465             #pod =method named_replace
466             #pod
467             #pod This replacer should be used with the C input processor.
468             #pod It expects the input to be a hashref and it finds values to be interpolated by
469             #pod looking in the hashref for the brace-enclosed name on each format code. Here's
470             #pod an example use:
471             #pod
472             #pod $formatter->format("This was the %{adj}s day in %{num}d weeks.", {
473             #pod adj => 'best',
474             #pod num => 6,
475             #pod });
476             #pod
477             #pod =method indexed_replace
478             #pod
479             #pod This replacer should be used with the C input
480             #pod processor. It expects the input to be an arrayref and it finds values to be
481             #pod interpolated by looking in the arrayref for the brace-enclosed index on each
482             #pod format code. Here's an example use:
483             #pod
484             #pod $formatter->format("This was the %{1}s day in %{0}d weeks.", [ 6, 'best' ]);
485             #pod
486             #pod =cut
487              
488             sub __closure_replace {
489 15     15   29 my ($closure) = @_;
490              
491             return sub {
492 18     18   32 my ($self, $hunks, $input) = @_;
493              
494 18         26 my $heap = {};
495 18         29 my $code = $self->codes;
496              
497 18         56 for my $i (grep { ref $hunks->[$_] } 0 .. $#$hunks) {
  75         146  
498 28         56 my $hunk = $hunks->[ $i ];
499 28         45 my $conv = $code->{ $hunk->{conversion} };
500              
501 28 100       201 Carp::croak("Unknown conversion in stringf: $hunk->{conversion}")
502             unless defined $conv;
503              
504 27 100       46 if (ref $conv) {
505 13         41 $hunks->[ $i ]->{replacement} = $self->$closure({
506             conv => $conv,
507             hunk => $hunk,
508             heap => $heap,
509             input => $input,
510             });
511             } else {
512 14         31 $hunks->[ $i ]->{replacement} = $conv;
513             }
514             }
515 15         2169 };
516             }
517              
518             # $self->$string_replacer($hunks, $input);
519             BEGIN {
520             *positional_replace = __closure_replace(sub {
521 5         8 my ($self, $arg) = @_;
522 5         15 local $_ = $arg->{input}->[ $arg->{heap}{nth}++ ];
523 5         14 return $arg->{conv}->($self, $_, $arg->{hunk}{argument});
524 5     5   50 });
525              
526             *named_replace = __closure_replace(sub {
527 6         11 my ($self, $arg) = @_;
528 6         13 local $_ = $arg->{input}->{ $arg->{hunk}{argument} };
529 6         16 return $arg->{conv}->($self, $_, $arg->{hunk}{argument});
530 5         23 });
531              
532             *indexed_replace = __closure_replace(sub {
533 2         3 my ($self, $arg) = @_;
534 2         6 local $_ = $arg->{input}->[ $arg->{hunk}{argument} ];
535 2         5 return $arg->{conv}->($self, $_, $arg->{hunk}{argument});
536 5         15 });
537             }
538              
539             #pod =method method_replace
540             #pod
541             #pod This string replacer method expects the input to be a single value on which
542             #pod methods can be called. If a value was given in braces to the format code, it
543             #pod is passed as an argument.
544             #pod
545             #pod =cut
546              
547             # should totally be rewritten with commonality with keyed_replace factored out
548             sub method_replace {
549 2     2 1 4 my ($self, $hunks, $input) = @_;
550              
551 2         3 my $heap = {};
552 2         4 my $code = $self->codes;
553              
554 2         6 for my $i (grep { ref $hunks->[$_] } 0 .. $#$hunks) {
  10         18  
555 5         17 my $hunk = $hunks->[ $i ];
556 5         11 my $conv = $code->{ $hunk->{conversion} };
557              
558 5 50       7 Carp::croak("Unknown conversion in stringf: $hunk->{conversion}")
559             unless defined $conv;
560              
561 5 100       10 if (ref $conv) {
562 3         4 local $_ = $input;
563 3         7 $hunks->[ $i ]->{replacement} = $input->$conv($hunk->{argument});
564             } else {
565 2         2 local $_ = $input;
566             $hunks->[ $i ]->{replacement} = $input->$conv(
567             defined $hunk->{argument} ? $hunk->{argument} : ()
568 2 100       9 );
569             }
570             }
571             }
572              
573             #pod =method keyed_replace
574             #pod
575             #pod This string replacer method expects the input to be a single hashref. Coderef
576             #pod code values are used as callbacks, but strings are used as hash keys. If a
577             #pod value was given in braces to the format code, it is ignored.
578             #pod
579             #pod For example if the codes contain C<< i => 'ident' >> then C<%i> in the format
580             #pod string will be replaced with C<< $input->{ident} >> in the output.
581             #pod
582             #pod =cut
583              
584             # should totally be rewritten with commonality with method_replace factored out
585             sub keyed_replace {
586 1     1 1 2 my ($self, $hunks, $input) = @_;
587              
588 1         2 my $heap = {};
589 1         2 my $code = $self->codes;
590              
591 1         3 for my $i (grep { ref $hunks->[$_] } 0 .. $#$hunks) {
  4         9  
592 2         2 my $hunk = $hunks->[ $i ];
593 2         5 my $conv = $code->{ $hunk->{conversion} };
594              
595 2 50       3 Carp::croak("Unknown conversion in stringf: $hunk->{conversion}")
596             unless defined $conv;
597              
598 2 50       6 if (ref $conv) {
599 0         0 local $_ = $input;
600 0         0 $hunks->[ $i ]->{replacement} = $input->$conv($hunk->{argument});
601             } else {
602 2         3 local $_ = $input;
603 2         5 $hunks->[ $i ]->{replacement} = $input->{$conv};
604             }
605             }
606             }
607              
608             #pod =method hunk_formatter
609             #pod
610             #pod The hunk_formatter processes each the hashref hunks left after string
611             #pod replacement and returns a string. When it is called, it is passed a hunk
612             #pod hashref and must return a string.
613             #pod
614             #pod =method format_simply
615             #pod
616             #pod This is the default hunk formatter. It deals with minimum and maximum width
617             #pod cues as well as left and right alignment. Beyond that, it does no formatting
618             #pod of the replacement string.
619             #pod
620             #pod =cut
621              
622             sub format_simply {
623 34     34 1 54 my ($self, $hunk) = @_;
624              
625 34         50 my $replacement = $hunk->{replacement};
626 34         41 my $replength = length $replacement;
627              
628 34   100     83 my $alignment = $hunk->{alignment} || '';
629 34   100     73 my $min_width = $hunk->{min_width} || 0;
630 34   66     73 my $max_width = $hunk->{max_width} || $replength;
631              
632 34 50 66     104 $min_width ||= $replength > $min_width ? $min_width : $replength;
633 34 0 33     51 $max_width ||= $max_width > $replength ? $max_width : $replength;
634              
635 34         196 return sprintf "%$alignment${min_width}.${max_width}s", $replacement;
636             }
637              
638             1;
639              
640             #pod =begin :postlude
641             #pod
642             #pod =head1 HISTORY
643             #pod
644             #pod String::Formatter is based on L, written by
645             #pod Darren Chamberlain. For a history of the code, check the project's source code
646             #pod repository. All bugs should be reported to Ricardo Signes and
647             #pod String::Formatter. Very little of the original code remains.
648             #pod
649             #pod =end :postlude
650             #pod
651             #pod =for Pod::Coverage
652             #pod codes
653             #pod default_format_hunker
654             #pod default_input_processor
655             #pod default_string_replacer
656             #pod default_hunk_formatter
657             #pod
658              
659             __END__