File Coverage

blib/lib/DateTime/Format/Builder/Parser.pm
Criterion Covered Total %
statement 155 166 93.3
branch 61 76 80.2
condition 7 15 46.6
subroutine 26 28 92.8
pod 10 20 50.0
total 259 305 84.9


line stmt bran cond sub pod time code
1             package DateTime::Format::Builder::Parser;
2              
3 24     24   91452 use strict;
  24         58  
  24         662  
4 24     24   117 use warnings;
  24         46  
  24         952  
5              
6             our $VERSION = '0.83';
7              
8 24     24   132 use Carp qw( croak );
  24         71  
  24         1089  
9 24         1237 use Params::Validate qw(
10             validate SCALAR CODEREF UNDEF ARRAYREF
11 24     24   654 );
  24         7976  
12 24     24   168 use Scalar::Util qw( weaken );
  24         57  
  24         47768  
13              
14             sub on_fail {
15 4     4 1 11 my ( $self, $input ) = @_;
16 4         14 my $maker = $self->maker;
17 4 50 33     58 if ( $maker and $maker->can('on_fail') ) {
18 4         31 $maker->on_fail($input);
19             }
20             else {
21 0         0 croak __PACKAGE__ . ": Invalid date format: $input";
22             }
23             }
24              
25             sub no_parser {
26 0     0 0 0 croak "No parser set for this parser object.";
27             }
28              
29             sub new {
30 40     40 0 66 my $class = shift;
31 40   33     146 $class = ref($class) || $class;
32              
33 40         150 my $self = bless {
34             on_fail => \&on_fail,
35             parser => \&no_parser,
36             }, $class;
37              
38 40         91 return $self;
39             }
40              
41 4     4 0 10 sub maker { $_[0]->{maker} }
42              
43             sub set_maker {
44 40     40 0 67 my $self = shift;
45 40         63 my $maker = shift;
46              
47 40         109 $self->{maker} = $maker;
48             weaken $self->{maker}
49 40 50       95 if ref $self->{maker};
50              
51 40         63 return $self;
52             }
53              
54             sub fail {
55 5     5 0 17 my ( $self, $parent, $input ) = @_;
56 5         31 $self->{on_fail}->( $self, $input, $parent );
57             }
58              
59             sub parse {
60 52     52 0 21475 my ( $self, $parent, $input, @args ) = @_;
61 52         151 my $r = $self->{parser}->( $parent, $input, @args );
62 52 100       459 $self->fail( $parent, $input ) unless defined $r;
63 49         138 $r;
64             }
65              
66             sub set_parser {
67 40     40 0 95 my ( $self, $parser ) = @_;
68 40         93 $self->{parser} = $parser;
69 40         273 $self;
70             }
71              
72             sub set_fail {
73 1     1 0 3 my ( $self, $fail ) = @_;
74 1         6 $self->{on_fail} = $fail;
75 1         2 $self;
76             }
77              
78             my @callbacks = qw( on_match on_fail postprocess preprocess );
79              
80             {
81              
82             my %params = (
83             common => {
84             length => {
85             type => SCALAR | ARRAYREF,
86             optional => 1,
87             callbacks => {
88             'is an int' => sub { ref $_[0] ? 1 : $_[0] !~ /\D/ },
89             'not empty' => sub { ref $_[0] ? @{ $_[0] } >= 1 : 1 },
90             }
91             },
92              
93             # Stuff used by callbacks
94             label => { type => SCALAR, optional => 1 },
95             (
96             map { $_ => { type => CODEREF | ARRAYREF, optional => 1 } }
97             @callbacks
98             ),
99             },
100             );
101              
102             sub params {
103 65     65 1 104 my $self = shift;
104 65   33     259 my $caller = ref $self || $self;
105 65         163 return { map {%$_} @params{ $caller, 'common' } };
  130         1175  
106             }
107              
108             my $all_params;
109              
110             sub params_all {
111 65 100   65 1 938 return $all_params if defined $all_params;
112 15         53 my %all_params = map {%$_} values %params;
  75         288  
113 15         162 $_->{optional} = 1 for values %all_params;
114 15         333 $all_params = \%all_params;
115             }
116              
117             my %inverse;
118              
119             sub valid_params {
120 96     96 1 226 my $self = shift;
121 96         460 my $from = (caller)[0];
122 96         357 my %args = @_;
123 96         318 $params{$from} = \%args;
124 96         383 for ( keys %args ) {
125              
126             # %inverse contains keys matching all the
127             # possible params; values are the class if and
128             # only if that class is the only one that uses
129             # the given param.
130 192 50       553 $inverse{$_} = exists $inverse{$_} ? undef : $from;
131             }
132 96         192 undef $all_params;
133 96         229 1;
134             }
135              
136             sub whose_params {
137 71     71 1 105 my $param = shift;
138 71         150 return $inverse{$param};
139             }
140             }
141              
142             sub create_single_object {
143 0     0 0 0 my ($self) = shift;
144 0         0 my $obj = $self->new;
145 0         0 my $parser = $self->create_single_parser(@_);
146              
147 0         0 $obj->set_parser($parser);
148             }
149              
150             sub create_single_parser {
151 65     65 1 119 my $class = shift;
152 65 50       166 return $_[0] if ref $_[0] eq 'CODE'; # already code
153 65 50       155 @_ = %{ $_[0] } if ref $_[0] eq 'HASH'; # turn hashref into hash
  0         0  
154             # ordinary boring sort
155 65         204 my %args = validate( @_, params_all() );
156              
157             # Determine variables for ease of reference.
158 65         385 for (@callbacks) {
159 260 100       537 $args{$_} = $class->merge_callbacks( $args{$_} ) if $args{$_};
160             }
161              
162             # Determine parser class
163 65         93 my $from;
164 65         157 for ( keys %args ) {
165 71         147 $from = whose_params($_);
166 71 100 66     294 next if ( not defined $from ) or ( $from eq 'common' );
167 65         106 last;
168             }
169 65 50       173 croak "Could not identify a parsing module to use." unless $from;
170              
171             # Find and call parser creation method
172 65 50       482 my $method = $from->can("create_parser")
173             or croak
174             "Can't create a $_ parser (no appropriate create_parser method)";
175 65         170 my @args = %args;
176 65         213 %args = validate( @args, $from->params );
177 65         391 $from->$method(%args);
178             }
179              
180             sub merge_callbacks {
181 30     30 1 2407 my $self = shift;
182              
183 30 100       64 return unless @_; # No arguments
184 29 100       53 return unless $_[0]; # Irrelevant argument
185 28         54 my @callbacks = @_;
186 28 100       53 if ( @_ == 1 ) {
187 27 100       89 return $_[0] if ref $_[0] eq 'CODE';
188 3 100       8 @callbacks = @{ $_[0] } if ref $_[0] eq 'ARRAY';
  2         5  
189             }
190 4 100       9 return unless @callbacks;
191              
192 3         7 for (@callbacks) {
193 15 100       229 croak "All callbacks must be coderefs!" unless ref $_ eq 'CODE';
194             }
195              
196             return sub {
197 2     2   7 my $rv;
198 2         7 my %args = @_;
199 2         4 for my $cb (@callbacks) {
200 14         26 $rv = $cb->(%args);
201 14 50       43 return $rv unless $rv;
202              
203             # Ugh. Symbiotic. All but postprocessor return the date.
204 14 50       26 $args{input} = $rv unless $args{parsed};
205             }
206 2         11 $rv;
207 2         9 };
208             }
209              
210             sub create_multiple_parsers {
211 40     40 1 76 my $class = shift;
212 40         99 my ( $options, @specs ) = @_;
213              
214 40         128 my $obj = $class->new;
215              
216             # Organise the specs, and transform them into parsers.
217 40         129 my ( $lengths, $others ) = $class->sort_parsers( $options, \@specs );
218              
219             # Merge callbacks if any.
220 40         98 for ('preprocess') {
221             $options->{$_} = $class->merge_callbacks( $options->{$_} )
222 40 100       123 if $options->{$_};
223             }
224              
225             # Custom fail method?
226 40 100       103 $obj->set_fail( $options->{on_fail} ) if exists $options->{on_fail};
227              
228             # Who's our maker?
229 40 50       191 $obj->set_maker( $options->{maker} ) if exists $options->{maker};
230              
231             # We don't want to save the whole options hash as a closure, since
232             # that can cause a circular reference when $options->{maker} is
233             # set.
234 40         65 my $preprocess = $options->{preprocess};
235              
236             # These are the innards of a multi-parser.
237             my $parser = sub {
238 52     52   114 my ( $self, $date, @args ) = @_;
239 52 50       135 return unless defined $date;
240              
241             # Parameters common to the callbacks. Pre-prepared.
242 52 100       185 my %param = (
243             self => $self,
244             ( @args ? ( args => \@args ) : () ),
245             );
246              
247 52         93 my %p;
248              
249             # Preprocess and potentially fill %p
250 52 100       122 if ($preprocess) {
251 7         21 $date = $preprocess->( input => $date, parsed => \%p, %param );
252             }
253              
254             # Find length parser
255 52 100       182 if (%$lengths) {
256 9         15 my $length = length $date;
257 9         19 my $parser = $lengths->{$length};
258 9 100       25 if ($parser) {
259              
260             # Found one, call it with _copy_ of %p
261 8         26 my $dt = $parser->( $self, $date, {%p}, @args );
262 8 50       33 return $dt if defined $dt;
263             }
264             }
265              
266             # Or calls all others, with _copy_ of %p
267 44         93 for my $parser (@$others) {
268 74         219 my $dt = $parser->( $self, $date, {%p}, @args );
269 74 100       251 return $dt if defined $dt;
270             }
271              
272             # Failed, return undef.
273 5         13 return;
274 40         174 };
275 40         121 $obj->set_parser($parser);
276             }
277              
278             sub sort_parsers {
279 40     40 1 64 my $class = shift;
280 40         63 shift;
281 40         74 my ($specs) = @_;
282 40         67 my ( %lengths, @others );
283              
284 40         91 for my $spec (@$specs) {
285              
286             # Put coderefs straight into the 'other' heap.
287 65 50       217 if ( ref $spec eq 'CODE' ) {
    50          
288 0         0 push @others, $spec;
289             }
290              
291             # Specifications...
292             elsif ( ref $spec eq 'HASH' ) {
293 65 100       153 if ( exists $spec->{length} ) {
294 12         40 my $code = $class->create_single_parser(%$spec);
295             my @lengths
296             = ref $spec->{length}
297 1         4 ? @{ $spec->{length} }
298 12 100       39 : ( $spec->{length} );
299 12         24 for my $length (@lengths) {
300 13         20 push @{ $lengths{$length} }, $code;
  13         44  
301             }
302             }
303             else {
304 53         270 push @others, $class->create_single_parser(%$spec);
305             }
306             }
307              
308             # Something else
309             else {
310 0         0 croak "Invalid specification in list.";
311             }
312             }
313              
314 40         169 while ( my ( $length, $parsers ) = each %lengths ) {
315 9         23 $lengths{$length} = $class->chain_parsers($parsers);
316             }
317              
318 40         164 return ( \%lengths, \@others );
319             }
320              
321             sub chain_parsers {
322 9     9 0 17 my ( $self, $parsers ) = @_;
323 9 100       38 return $parsers->[0] if @$parsers == 1;
324             return sub {
325 5     5   20 my $self = shift;
326 5         9 for my $parser (@$parsers) {
327 10         19 my $rv = $self->$parser(@_);
328 10 100       23 return $rv if defined $rv;
329             }
330 0         0 return undef;
331 3         15 };
332             }
333              
334             sub create_parser {
335 40     40 1 122 my $class = shift;
336 40 50       133 if ( not ref $_[0] ) {
337              
338             # Simple case of single specification as a hash
339 0         0 return $class->create_single_object(@_);
340             }
341              
342             # Let's see if we were given an options block
343 40         80 my %options;
344 40         122 while ( ref $_[0] eq 'ARRAY' ) {
345 44         66 my $options = shift;
346 44         193 %options = ( %options, @$options );
347             }
348              
349             # Now, can we create a multi-parser out of the remaining arguments?
350 40 100 66     179 if ( ref $_[0] eq 'HASH' or ref $_[0] eq 'CODE' ) {
351 20         85 return $class->create_multiple_parsers( \%options, @_ );
352             }
353             else {
354             # If it wasn't a HASH or CODE, then it was (ideally)
355             # a list of pairs describing a single specification.
356 20         81 return $class->create_multiple_parsers( \%options, {@_} );
357             }
358             }
359              
360             require DateTime::Format::Builder::Parser::Dispatch;
361             require DateTime::Format::Builder::Parser::generic;
362             require DateTime::Format::Builder::Parser::Quick;
363             require DateTime::Format::Builder::Parser::Regex;
364             require DateTime::Format::Builder::Parser::Strptime;
365              
366             1;
367              
368             # ABSTRACT: Parser creation
369              
370             __END__
371              
372             =pod
373              
374             =encoding UTF-8
375              
376             =head1 NAME
377              
378             DateTime::Format::Builder::Parser - Parser creation
379              
380             =head1 VERSION
381              
382             version 0.83
383              
384             =head1 SYNOPSIS
385              
386             my $class = 'DateTime::Format::Builder::Parser';
387             my $parser = $class->create_single_parser(%specs);
388              
389             =head1 DESCRIPTION
390              
391             This is a utility class for L<DateTime::Format::Builder> that handles creation
392             of parsers. It is to here that C<Builder> delegates most of its
393             responsibilities.
394              
395             =head1 METHODS
396              
397             There are two sorts of methods in this class. Those used by parser
398             implementations and those used by C<Builder>. It is generally unlikely the
399             user will want to use any of them.
400              
401             They are presented, grouped according to use.
402              
403             =head2 Parameter Handling (implementations)
404              
405             These methods allow implementations to have validation of their arguments in a
406             standard manner and due to C<Parser>'s implementation, these methods also
407             allow C<Parser> to determine which implementation to use.
408              
409             =head3 Common parameters
410              
411             These parameters appear for all parser implementations. These are primarily
412             documented in L<DateTime::Format::Builder>.
413              
414             =over 4
415              
416             =item * on_match
417              
418             =item * on_fail
419              
420             =item * postprocess
421              
422             =item * preprocess
423              
424             =item * label
425              
426             =item * length
427              
428             B<length> may be a number or an arrayref of numbers indicating the length of
429             the input. This lets us optimize in the case of static length input. If
430             supplying an arrayref of numbers, please keep the number of numbers to a
431             minimum.
432              
433             =back
434              
435             =head3 params
436              
437             my $params = $self->params;
438             validate( @_, $params );
439              
440             Returns declared parameters and C<common> parameters in a hashref suitable for
441             handing to L<Params::Validate>'s C<validate> function.
442              
443             =head3 params_all
444              
445             my $all_params = $self->params_all;
446              
447             Returns a hash of all the valid options. Not recommended for general use.
448              
449             =head3 valid_params
450              
451             __PACKAGE__->valid_params(%params);
452              
453             Arguments are as per L<Params::Validate>'s C<validate> function. This method
454             is used to declare what your valid arguments are in a parser specification.
455              
456             =head3 whose_params
457              
458             my $class = whose_params( $key );
459              
460             Internal function which merely returns to which class a parameter is
461             unique. If not unique, returns C<undef>.
462              
463             =head2 Organizing and Creating Parsers
464              
465             =head3 create_single_parser
466              
467             This takes a single specification and returns a coderef that is a parser that
468             suits that specification. This is the end of the line for all the parser
469             creation methods. It delegates no further.
470              
471             If a coderef is specified, then that coderef is immediately returned (it is
472             assumed to be appropriate).
473              
474             The single specification (if not a coderef) can be either a hashref or a
475             hash. The keys and values must be as per the specification.
476              
477             It is here that any arrays of callbacks are unified. It is also here that any
478             parser implementations are used. With the spec that's given, the keys are
479             looked at and whichever module is the first to have a unique key in the spec
480             is the one to whom the spec is given.
481              
482             B<Note>: please declare a C<valid_params> argument with an uppercase
483             letter. For example, if you're writing
484             C<DateTime::Format::Builder::Parser::Fnord>, declare a parameter called
485             C<Fnord>. Similarly, C<DTFBP::Strptime> should have C<Strptime> and
486             C<DTFBP::Regex> should have C<Regex>. These latter two don't for backwards
487             compatibility reasons.
488              
489             The returned parser will return either a C<DateTime> object or C<undef>.
490              
491             =head3 merge_callbacks
492              
493             Produce either undef or a single coderef from either undef, an empty array, a
494             single coderef or an array of coderefs
495              
496             =head2 create_multiple_parsers
497              
498             Given the options block (as made from C<create_parser>) and a list of single
499             parser specifications, this returns a coderef that returns either the
500             resultant C<DateTime> object or C<undef>.
501              
502             It first sorts the specifications using C<sort_parsers> and then creates the
503             function based on what that returned.
504              
505             =head2 sort_parsers
506              
507             This takes the list of specifications and sorts them while turning the
508             specifications into parsers. It returns two values: the first is a hashref
509             containing all the length based parsers. The second is an array containing all
510             the other parsers.
511              
512             If any of the specs are not code or hash references, then it will call
513             C<croak>.
514              
515             Code references are put directly into the 'other' array. Any hash references
516             without I<length> keys are run through C<create_single_parser> and the
517             resultant parser is placed in the 'other' array.
518              
519             Hash references B<with> I<length> keys are run through
520             C<create_single_parser>, but the resultant parser is used as the value in the
521             length hashref with the length being the key. If two or more parsers have the
522             same I<length> specified then an error is thrown.
523              
524             =head2 create_parser
525              
526             C<create_class> is mostly a wrapper around C<create_parser> that does loops
527             and stuff and calls C<create_parser> to create the actual parsers.
528              
529             C<create_parser> takes the parser specifications (be they single
530             specifications or multiple specifications) and returns an anonymous coderef
531             that is suitable for use as a method. The coderef will call C<croak> in the
532             event of being unable to parse the single string it expects as input.
533              
534             The simplest input is that of a single specification, presented just as a
535             plain hash, not a hashref. This is passed directly to C<create_single_parser>
536             with the return value from that being wrapped in a function that lets it
537             C<croak> on failure, with that wrapper being returned.
538              
539             If the first argument to C<create_parser> is an arrayref, then that is taken
540             to be an options block (as per the multiple parser specification documented
541             earlier).
542              
543             Any further arguments should be either hashrefs or coderefs. If the first
544             argument after the optional arrayref is not a hashref or coderef then that
545             argument and all remaining arguments are passed off to C<create_single_parser>
546             directly. If the first argument is a hashref or coderef, then it and the
547             remaining arguments are passed to C<create_multiple_parsers>.
548              
549             The resultant coderef from calling either of the creation methods is then
550             wrapped in a function that calls C<croak> in event of failure or the
551             C<DateTime> object in event of success.
552              
553             =head1 FINDING IMPLEMENTATIONS
554              
555             C<Parser> automatically loads any parser classes in C<@INC>.
556              
557             To be loaded automatically, you must be a
558             C<DateTime::Format::Builder::Parser::XXX> module.
559              
560             To be invisible, and not loaded, start your class with a lower class
561             letter. These are ignored.
562              
563             =head1 WRITING A PARSER IMPLEMENTATION
564              
565             =head2 Naming your parser
566              
567             Create a module and name it in the form
568             C<DateTime::Format::Builder::Parser::XXX> where I<XXX> is whatever you like,
569             so long as it doesn't start with a lower case letter.
570              
571             Alternatively, call it something completely different if you don't mind the
572             users explicitly loading your module.
573              
574             I'd recommend keeping within the C<DateTime::Format::Builder> namespace though
575             --- at the time of writing I've not given thought to what non-auto loaded ones
576             should be called. Any ideas, please email me.
577              
578             =head2 Declaring specification arguments
579              
580             Call C<<DateTime::Format::Builder::Parser->valid_params>> with
581             C<Params::Validate> style arguments. For example:
582              
583             DateTime::Format::Builder::Parser->valid_params(
584             params => { type => ARRAYREF },
585             Regex => {
586             type => SCALARREF,
587             callbacks => {
588             'is a regex' => sub { ref(shift) eq 'Regexp' }
589             }
590             }
591             );
592              
593             Start one of the key names with a capital letter. Ideally that key should
594             match the I<XXX> from earlier. This will be used to help identify which module
595             a parser specification should be given to.
596              
597             The key names I<on_match>, I<on_fail>, I<postprocess>, I<preprocess>, I<label>
598             and I<length> are predefined. You are recommended to make use of them. You may
599             ignore I<length> as C<sort_parsers> takes care of that.
600              
601             =head2 Define create_parser
602              
603             A class method of the name C<create_parser> that does the following:
604              
605             Its arguments are as for a normal method (i.e. class as first argument). The
606             other arguments are the result from a call to C<Params::Validate> according to
607             your specification (the C<valid_params> earlier), i.e. a hash of argument name
608             and value.
609              
610             The return value should be a coderef that takes a date string as its first
611             argument and returns either a C<DateTime> object or C<undef>.
612              
613             =head2 Callbacks
614              
615             It is preferred that you support some callbacks to your parsers. In
616             particular, C<preprocess>, C<on_match>, C<on_fail> and C<postprocess>. See the
617             L<main Builder|DateTime::Format::Builder> docs for the appropriate placing of
618             calls to the callbacks.
619              
620             =head1 SEE ALSO
621              
622             C<datetime@perl.org> mailing list.
623              
624             http://datetime.perl.org/
625              
626             L<perl>, L<DateTime>, L<DateTime::Format::Builder>.
627              
628             L<Params::Validate>.
629              
630             L<DateTime::Format::Builder::Parser::generic>,
631             L<DateTime::Format::Builder::Parser::Dispatch>,
632             L<DateTime::Format::Builder::Parser::Quick>,
633             L<DateTime::Format::Builder::Parser::Regex>,
634             L<DateTime::Format::Builder::Parser::Strptime>.
635              
636             =head1 SUPPORT
637              
638             Bugs may be submitted at L<https://github.com/houseabsolute/DateTime-Format-Builder/issues>.
639              
640             I am also usually active on IRC as 'autarch' on C<irc://irc.perl.org>.
641              
642             =head1 SOURCE
643              
644             The source code repository for DateTime-Format-Builder can be found at L<https://github.com/houseabsolute/DateTime-Format-Builder>.
645              
646             =head1 AUTHORS
647              
648             =over 4
649              
650             =item *
651              
652             Dave Rolsky <autarch@urth.org>
653              
654             =item *
655              
656             Iain Truskett <spoon@cpan.org>
657              
658             =back
659              
660             =head1 COPYRIGHT AND LICENSE
661              
662             This software is Copyright (c) 2020 by Dave Rolsky.
663              
664             This is free software, licensed under:
665              
666             The Artistic License 2.0 (GPL Compatible)
667              
668             The full text of the license can be found in the
669             F<LICENSE> file included with this distribution.
670              
671             =cut