File Coverage

blib/lib/DateTime/Format/Builder/Parser.pm
Criterion Covered Total %
statement 155 166 93.3
branch 61 76 80.2
condition 7 15 46.6
subroutine 26 28 92.8
pod 9 20 45.0
total 258 305 84.5


line stmt bran cond sub pod time code
1             package DateTime::Format::Builder::Parser;
2              
3 24     24   164491 use strict;
  24         2773  
  24         809  
4 24     24   2386 use warnings;
  24         2258  
  24         1038  
5              
6             our $VERSION = '0.82';
7              
8 24     24   135 use Carp qw( croak );
  24         57  
  24         1264  
9 24         1612 use Params::Validate qw(
10             validate SCALAR CODEREF UNDEF ARRAYREF
11 24     24   2998 );
  24         9086  
12 24     24   171 use Scalar::Util qw( weaken );
  24         80  
  24         50434  
13              
14             sub on_fail {
15 4     4 0 12 my ( $self, $input ) = @_;
16 4         15 my $maker = $self->maker;
17 4 50 33     60 if ( $maker and $maker->can('on_fail') ) {
18 4         37 $maker->on_fail($input);
19             }
20             else {
21 0         0 croak __PACKAGE__ . ": Invalid date format: $input";
22             }
23             }
24              
25             sub no_parser {
26 0     0 0 0 croak "No parser set for this parser object.";
27             }
28              
29             sub new {
30 40     40 0 74 my $class = shift;
31 40   33     158 $class = ref($class) || $class;
32              
33 40         168 my $self = bless {
34             on_fail => \&on_fail,
35             parser => \&no_parser,
36             }, $class;
37              
38 40         97 return $self;
39             }
40              
41 4     4 0 12 sub maker { $_[0]->{maker} }
42              
43             sub set_maker {
44 40     40 0 75 my $self = shift;
45 40         71 my $maker = shift;
46              
47 40         119 $self->{maker} = $maker;
48             weaken $self->{maker}
49 40 50       110 if ref $self->{maker};
50              
51 40         78 return $self;
52             }
53              
54             sub fail {
55 5     5 0 18 my ( $self, $parent, $input ) = @_;
56 5         37 $self->{on_fail}->( $self, $input, $parent );
57             }
58              
59             sub parse {
60 52     52 0 6664 my ( $self, $parent, $input, @args ) = @_;
61 52         151 my $r = $self->{parser}->( $parent, $input, @args );
62 52 100       157 $self->fail( $parent, $input ) unless defined $r;
63 49         161 $r;
64             }
65              
66             sub set_parser {
67 40     40 0 101 my ( $self, $parser ) = @_;
68 40         75 $self->{parser} = $parser;
69 40         269 $self;
70             }
71              
72             sub set_fail {
73 1     1 0 4 my ( $self, $fail ) = @_;
74 1         6 $self->{on_fail} = $fail;
75 1         2 $self;
76             }
77              
78             my @callbacks = qw( on_match on_fail postprocess preprocess );
79              
80             {
81              
82             my %params = (
83             common => {
84             length => {
85             type => SCALAR | ARRAYREF,
86             optional => 1,
87             callbacks => {
88             'is an int' => sub { ref $_[0] ? 1 : $_[0] !~ /\D/ },
89             'not empty' => sub { ref $_[0] ? @{ $_[0] } >= 1 : 1 },
90             }
91             },
92              
93             # Stuff used by callbacks
94             label => { type => SCALAR, optional => 1 },
95             (
96             map { $_ => { type => CODEREF | ARRAYREF, optional => 1 } }
97             @callbacks
98             ),
99             },
100             );
101              
102             sub params {
103 65     65 1 118 my $self = shift;
104 65   33     216 my $caller = ref $self || $self;
105 65         184 return { map {%$_} @params{ $caller, 'common' } };
  130         1359  
106             }
107              
108             my $all_params;
109              
110             sub params_all {
111 65 100   65 1 1039 return $all_params if defined $all_params;
112 15         62 my %all_params = map {%$_} values %params;
  75         323  
113 15         190 $_->{optional} = 1 for values %all_params;
114 15         361 $all_params = \%all_params;
115             }
116              
117             my %inverse;
118              
119             sub valid_params {
120 96     96 1 245 my $self = shift;
121 96         372 my $from = (caller)[0];
122 96         403 my %args = @_;
123 96         278 $params{$from} = \%args;
124 96         398 for ( keys %args ) {
125              
126             # %inverse contains keys matching all the
127             # possible params; values are the class if and
128             # only if that class is the only one that uses
129             # the given param.
130 192 50       621 $inverse{$_} = exists $inverse{$_} ? undef : $from;
131             }
132 96         215 undef $all_params;
133 96         262 1;
134             }
135              
136             sub whose_params {
137 81     81 1 143 my $param = shift;
138 81         177 return $inverse{$param};
139             }
140             }
141              
142             sub create_single_object {
143 0     0 0 0 my ($self) = shift;
144 0         0 my $obj = $self->new;
145 0         0 my $parser = $self->create_single_parser(@_);
146              
147 0         0 $obj->set_parser($parser);
148             }
149              
150             sub create_single_parser {
151 65     65 1 133 my $class = shift;
152 65 50       170 return $_[0] if ref $_[0] eq 'CODE'; # already code
153 65 50       166 @_ = %{ $_[0] } if ref $_[0] eq 'HASH'; # turn hashref into hash
  0         0  
154             # ordinary boring sort
155 65         191 my %args = validate( @_, params_all() );
156              
157             # Determine variables for ease of reference.
158 65         415 for (@callbacks) {
159 260 100       649 $args{$_} = $class->merge_callbacks( $args{$_} ) if $args{$_};
160             }
161              
162             # Determine parser class
163 65         107 my $from;
164 65         171 for ( keys %args ) {
165 81         183 $from = whose_params($_);
166 81 100 66     344 next if ( not defined $from ) or ( $from eq 'common' );
167 65         154 last;
168             }
169 65 50       207 croak "Could not identify a parsing module to use." unless $from;
170              
171             # Find and call parser creation method
172 65 50       537 my $method = $from->can("create_parser")
173             or croak
174             "Can't create a $_ parser (no appropriate create_parser method)";
175 65         194 my @args = %args;
176 65         245 %args = validate( @args, $from->params() );
177 65         453 $from->$method(%args);
178             }
179              
180             sub merge_callbacks {
181 30     30 1 2682 my $self = shift;
182              
183 30 100       86 return unless @_; # No arguments
184 29 100       65 return unless $_[0]; # Irrelevant argument
185 28         62 my @callbacks = @_;
186 28 100       70 if ( @_ == 1 ) {
187 27 100       104 return $_[0] if ref $_[0] eq 'CODE';
188 3 100       10 @callbacks = @{ $_[0] } if ref $_[0] eq 'ARRAY';
  2         5  
189             }
190 4 100       13 return unless @callbacks;
191              
192 3         7 for (@callbacks) {
193 15 100       232 croak "All callbacks must be coderefs!" unless ref $_ eq 'CODE';
194             }
195              
196             return sub {
197 2     2   8 my $rv;
198 2         7 my %args = @_;
199 2         5 for my $cb (@callbacks) {
200 14         31 $rv = $cb->(%args);
201 14 50       51 return $rv unless $rv;
202              
203             # Ugh. Symbiotic. All but postprocessor return the date.
204 14 50       35 $args{input} = $rv unless $args{parsed};
205             }
206 2         10 $rv;
207 2         16 };
208             }
209              
210             sub create_multiple_parsers {
211 40     40 1 77 my $class = shift;
212 40         100 my ( $options, @specs ) = @_;
213              
214 40         125 my $obj = $class->new;
215              
216             # Organise the specs, and transform them into parsers.
217 40         153 my ( $lengths, $others ) = $class->sort_parsers( $options, \@specs );
218              
219             # Merge callbacks if any.
220 40         104 for ('preprocess') {
221             $options->{$_} = $class->merge_callbacks( $options->{$_} )
222 40 100       142 if $options->{$_};
223             }
224              
225             # Custom fail method?
226 40 100       107 $obj->set_fail( $options->{on_fail} ) if exists $options->{on_fail};
227              
228             # Who's our maker?
229 40 50       191 $obj->set_maker( $options->{maker} ) if exists $options->{maker};
230              
231             # We don't want to save the whole options hash as a closure, since
232             # that can cause a circular reference when $options->{maker} is
233             # set.
234 40         75 my $preprocess = $options->{preprocess};
235              
236             # These are the innards of a multi-parser.
237             my $parser = sub {
238 52     52   121 my ( $self, $date, @args ) = @_;
239 52 50       143 return unless defined $date;
240              
241             # Parameters common to the callbacks. Pre-prepared.
242 52 100       203 my %param = (
243             self => $self,
244             ( @args ? ( args => \@args ) : () ),
245             );
246              
247 52         98 my %p;
248              
249             # Preprocess and potentially fill %p
250 52 100       130 if ($preprocess) {
251 7         26 $date = $preprocess->( input => $date, parsed => \%p, %param );
252             }
253              
254             # Find length parser
255 52 100       197 if (%$lengths) {
256 9         18 my $length = length $date;
257 9         20 my $parser = $lengths->{$length};
258 9 100       28 if ($parser) {
259              
260             # Found one, call it with _copy_ of %p
261 8         27 my $dt = $parser->( $self, $date, {%p}, @args );
262 8 50       39 return $dt if defined $dt;
263             }
264             }
265              
266             # Or calls all others, with _copy_ of %p
267 44         100 for my $parser (@$others) {
268 74         253 my $dt = $parser->( $self, $date, {%p}, @args );
269 74 100       301 return $dt if defined $dt;
270             }
271              
272             # Failed, return undef.
273 5         17 return;
274 40         203 };
275 40         134 $obj->set_parser($parser);
276             }
277              
278             sub sort_parsers {
279 40     40 1 78 my $class = shift;
280 40         63 shift;
281 40         84 my ($specs) = @_;
282 40         75 my ( %lengths, @others );
283              
284 40         98 for my $spec (@$specs) {
285              
286             # Put coderefs straight into the 'other' heap.
287 65 50       244 if ( ref $spec eq 'CODE' ) {
    50          
288 0         0 push @others, $spec;
289             }
290              
291             # Specifications...
292             elsif ( ref $spec eq 'HASH' ) {
293 65 100       178 if ( exists $spec->{length} ) {
294 12         46 my $code = $class->create_single_parser(%$spec);
295             my @lengths
296             = ref $spec->{length}
297 1         3 ? @{ $spec->{length} }
298 12 100       46 : ( $spec->{length} );
299 12         26 for my $length (@lengths) {
300 13         21 push @{ $lengths{$length} }, $code;
  13         51  
301             }
302             }
303             else {
304 53         219 push @others, $class->create_single_parser(%$spec);
305             }
306             }
307              
308             # Something else
309             else {
310 0         0 croak "Invalid specification in list.";
311             }
312             }
313              
314 40         195 while ( my ( $length, $parsers ) = each %lengths ) {
315 9         27 $lengths{$length} = $class->chain_parsers($parsers);
316             }
317              
318 40         137 return ( \%lengths, \@others );
319             }
320              
321             sub chain_parsers {
322 9     9 0 19 my ( $self, $parsers ) = @_;
323 9 100       39 return $parsers->[0] if @$parsers == 1;
324             return sub {
325 5     5   9 my $self = shift;
326 5         10 for my $parser (@$parsers) {
327 10         27 my $rv = $self->$parser(@_);
328 10 100       30 return $rv if defined $rv;
329             }
330 0         0 return undef;
331 3         17 };
332             }
333              
334             sub create_parser {
335 40     40 1 78 my $class = shift;
336 40 50       136 if ( not ref $_[0] ) {
337              
338             # Simple case of single specification as a hash
339 0         0 return $class->create_single_object(@_);
340             }
341              
342             # Let's see if we were given an options block
343 40         76 my %options;
344 40         129 while ( ref $_[0] eq 'ARRAY' ) {
345 44         73 my $options = shift;
346 44         210 %options = ( %options, @$options );
347             }
348              
349             # Now, can we create a multi-parser out of the remaining arguments?
350 40 100 66     193 if ( ref $_[0] eq 'HASH' or ref $_[0] eq 'CODE' ) {
351 20         90 return $class->create_multiple_parsers( \%options, @_ );
352             }
353             else {
354             # If it wasn't a HASH or CODE, then it was (ideally)
355             # a list of pairs describing a single specification.
356 20         91 return $class->create_multiple_parsers( \%options, {@_} );
357             }
358             }
359              
360             require DateTime::Format::Builder::Parser::Dispatch;
361             require DateTime::Format::Builder::Parser::generic;
362             require DateTime::Format::Builder::Parser::Quick;
363             require DateTime::Format::Builder::Parser::Regex;
364             require DateTime::Format::Builder::Parser::Strptime;
365              
366             1;
367              
368             # ABSTRACT: Parser creation
369              
370             __END__
371              
372             =pod
373              
374             =encoding UTF-8
375              
376             =head1 NAME
377              
378             DateTime::Format::Builder::Parser - Parser creation
379              
380             =head1 VERSION
381              
382             version 0.82
383              
384             =head1 SYNOPSIS
385              
386             my $class = 'DateTime::Format::Builder::Parser';
387             my $parser = $class->create_single_parser( %specs );
388              
389             =head1 DESCRIPTION
390              
391             This is a utility class for L<DateTime::Format::Builder> that
392             handles creation of parsers. It is to here that C<Builder> delegates
393             most of its responsibilities.
394              
395             =head1 METHODS
396              
397             There are two sorts of methods in this class. Those used by
398             parser implementations and those used by C<Builder>. It is
399             generally unlikely the user will want to use any of them.
400              
401             They are presented, grouped according to use.
402              
403             =head2 Parameter Handling (implementations)
404              
405             These methods allow implementations to have validation of
406             their arguments in a standard manner and due to C<Parser>'s
407             implementation, these methods also allow C<Parser> to
408             determine which implementation to use.
409              
410             =head3 Common parameters
411              
412             These parameters appear for all parser implementations.
413             These are primarily documented in
414             L<DateTime::Format::Builder>.
415              
416             =over 4
417              
418             =item *
419              
420             B<on_match>
421              
422             =item *
423              
424             B<on_fail>
425              
426             =item *
427              
428             B<postprocess>
429              
430             =item *
431              
432             B<preprocess>
433              
434             =item *
435              
436             B<label>
437              
438             =item *
439              
440             B<length> may be a number or an arrayref of numbers
441             indicating the length of the input. This lets us optimize in
442             the case of static length input. If supplying an arrayref of
443             numbers, please keep the number of numbers to a minimum.
444              
445             =back
446              
447             =head3 params
448              
449             my $params = $self->params();
450             validate( @_, $params );
451              
452             Returns declared parameters and C<common> parameters in a hashref
453             suitable for handing to L<Params::Validate>'s C<validate> function.
454              
455             =head3 params_all
456              
457             my $all_params = $self->params_all();
458              
459             Returns a hash of all the valid options. Not recommended
460             for general use.
461              
462             =head3 valid_params
463              
464             __PACKAGE__->valid_params( %params );
465              
466             Arguments are as per L<Params::Validate>'s C<validate> function.
467             This method is used to declare what your valid arguments are in
468             a parser specification.
469              
470             =head3 whose_params
471              
472             my $class = whose_params( $key );
473              
474             Internal function which merely returns to which class a
475             parameter is unique. If not unique, returns C<undef>.
476              
477             =head2 Organizing and Creating Parsers
478              
479             =head3 create_single_parser
480              
481             This takes a single specification and returns a coderef that
482             is a parser that suits that specification. This is the end
483             of the line for all the parser creation methods. It
484             delegates no further.
485              
486             If a coderef is specified, then that coderef is immediately
487             returned (it is assumed to be appropriate).
488              
489             The single specification (if not a coderef) can be either a
490             hashref or a hash. The keys and values must be as per the
491             specification.
492              
493             It is here that any arrays of callbacks are unified. It is
494             also here that any parser implementations are used. With
495             the spec that's given, the keys are looked at and whichever
496             module is the first to have a unique key in the spec is the
497             one to whom the spec is given.
498              
499             B<Note>: please declare a C<valid_params> argument with an
500             uppercase letter. For example, if you're writing
501             C<DateTime::Format::Builder::Parser::Fnord>, declare a
502             parameter called C<Fnord>. Similarly, C<DTFBP::Strptime>
503             should have C<Strptime> and C<DTFBP::Regex> should have
504             C<Regex>. These latter two don't for backwards compatibility
505             reasons.
506              
507             The returned parser will return either a C<DateTime> object
508             or C<undef>.
509              
510             =head3 merge_callbacks
511              
512             Produce either undef or a single coderef from either undef,
513             an empty array, a single coderef or an array of coderefs
514              
515             =head2 create_multiple_parsers
516              
517             Given the options block (as made from C<create_parser()>)
518             and a list of single parser specifications, this returns a
519             coderef that returns either the resultant C<DateTime> object
520             or C<undef>.
521              
522             It first sorts the specifications using C<sort_parsers()>
523             and then creates the function based on what that returned.
524              
525             =head2 sort_parsers
526              
527             This takes the list of specifications and sorts them while
528             turning the specifications into parsers. It returns two
529             values: the first is a hashref containing all the length
530             based parsers. The second is an array containing all the
531             other parsers.
532              
533             If any of the specs are not code or hash references, then it
534             will call C<croak()>.
535              
536             Code references are put directly into the 'other' array. Any
537             hash references without I<length> keys are run through
538             C<create_single_parser()> and the resultant parser is placed
539             in the 'other' array.
540              
541             Hash references B<with> I<length> keys are run through
542             C<create_single_parser()>, but the resultant parser is used
543             as the value in the length hashref with the length being the
544             key. If two or more parsers have the same I<length>
545             specified then an error is thrown.
546              
547             =head2 create_parser
548              
549             C<create_class()> is mostly a wrapper around
550             C<create_parser()> that does loops and stuff and calls
551             C<create_parser()> to create the actual parsers.
552              
553             C<create_parser()> takes the parser specifications (be they
554             single specifications or multiple specifications) and
555             returns an anonymous coderef that is suitable for use as a
556             method. The coderef will call C<croak()> in the event of
557             being unable to parse the single string it expects as input.
558              
559             The simplest input is that of a single specification,
560             presented just as a plain hash, not a hashref. This is
561             passed directly to C<create_single_parser()> with the return
562             value from that being wrapped in a function that lets it
563             C<croak()> on failure, with that wrapper being returned.
564              
565             If the first argument to C<create_parser()> is an arrayref,
566             then that is taken to be an options block (as per the
567             multiple parser specification documented earlier).
568              
569             Any further arguments should be either hashrefs or coderefs.
570             If the first argument after the optional arrayref is not a
571             hashref or coderef then that argument and all remaining
572             arguments are passed off to C<create_single_parser()>
573             directly. If the first argument is a hashref or coderef,
574             then it and the remaining arguments are passed to
575             C<create_multiple_parsers()>.
576              
577             The resultant coderef from calling either of the creation
578             methods is then wrapped in a function that calls C<croak()>
579             in event of failure or the C<DateTime> object in event of
580             success.
581              
582             =head1 FINDING IMPLEMENTATIONS
583              
584             C<Parser> automatically loads any parser classes in C<@INC>.
585              
586             To be loaded automatically, you must be a
587             C<DateTime::Format::Builder::Parser::XXX> module.
588              
589             To be invisible, and not loaded, start your class with a lower class
590             letter. These are ignored.
591              
592             =head1 WRITING A PARSER IMPLEMENTATION
593              
594             =head2 Naming your parser
595              
596             Create a module and name it in the form
597             C<DateTime::Format::Builder::Parser::XXX>
598             where I<XXX> is whatever you like,
599             so long as it doesn't start with a
600             lower case letter.
601              
602             Alternatively, call it something completely different
603             if you don't mind the users explicitly loading your module.
604              
605             I'd recommend keeping within the C<DateTime::Format::Builder>
606             namespace though --- at the time of writing I've not given
607             thought to what non-auto loaded ones should be called. Any
608             ideas, please email me.
609              
610             =head2 Declaring specification arguments
611              
612             Call C<<DateTime::Format::Builder::Parser->valid_params()>> with
613             C<Params::Validate> style arguments. For example:
614              
615             DateTime::Format::Builder::Parser->valid_params(
616             params => { type => ARRAYREF },
617             Regex => { type => SCALARREF, callbacks => {
618             'is a regex' => sub { ref(shift) eq 'Regexp' }
619             }}
620             );
621              
622             Start one of the key names with a capital letter. Ideally that key
623             should match the I<XXX> from earlier. This will be used to help
624             identify which module a parser specification should be given to.
625              
626             The key names I<on_match>, I<on_fail>, I<postprocess>, I<preprocess>,
627             I<label> and I<length> are predefined. You are recommended to make use
628             of them. You may ignore I<length> as C<sort_parsers> takes care of that.
629              
630             =head2 Define create_parser
631              
632             A class method of the name C<create_parser> that does the following:
633              
634             Its arguments are as for a normal method (i.e. class as first argument).
635             The other arguments are the result from a call to C<Params::Validate>
636             according to your specification (the C<valid_params> earlier), i.e. a
637             hash of argument name and value.
638              
639             The return value should be a coderef that takes a date string as its
640             first argument and returns either a C<DateTime> object or C<undef>.
641              
642             =head2 Callbacks
643              
644             It is preferred that you support some callbacks to your parsers.
645             In particular, C<preprocess>, C<on_match>, C<on_fail> and
646             C<postprocess>. See the L<main Builder|DateTime::Format::Builder>
647             docs for the appropriate placing of calls to the callbacks.
648              
649             =head1 SEE ALSO
650              
651             C<datetime@perl.org> mailing list.
652              
653             http://datetime.perl.org/
654              
655             L<perl>, L<DateTime>, L<DateTime::Format::Builder>.
656              
657             L<Params::Validate>.
658              
659             L<DateTime::Format::Builder::Parser::generic>,
660             L<DateTime::Format::Builder::Parser::Dispatch>,
661             L<DateTime::Format::Builder::Parser::Quick>,
662             L<DateTime::Format::Builder::Parser::Regex>,
663             L<DateTime::Format::Builder::Parser::Strptime>.
664              
665             =head1 SUPPORT
666              
667             Bugs may be submitted at L<http://rt.cpan.org/Public/Dist/Display.html?Name=DateTime-Format-Builder> or via email to L<bug-datetime-format-builder@rt.cpan.org|mailto:bug-datetime-format-builder@rt.cpan.org>.
668              
669             I am also usually active on IRC as 'autarch' on C<irc://irc.perl.org>.
670              
671             =head1 SOURCE
672              
673             The source code repository for DateTime-Format-Builder can be found at L<https://github.com/houseabsolute/DateTime-Format-Builder>.
674              
675             =head1 AUTHORS
676              
677             =over 4
678              
679             =item *
680              
681             Dave Rolsky <autarch@urth.org>
682              
683             =item *
684              
685             Iain Truskett
686              
687             =back
688              
689             =head1 COPYRIGHT AND LICENSE
690              
691             This software is Copyright (c) 2019 by Dave Rolsky.
692              
693             This is free software, licensed under:
694              
695             The Artistic License 2.0 (GPL Compatible)
696              
697             The full text of the license can be found in the
698             F<LICENSE> file included with this distribution.
699              
700             =cut