File Coverage

blib/lib/Date/ManipX/Almanac/Lang.pm
Criterion Covered Total %
statement 92 95 96.8
branch 25 34 73.5
condition 9 15 60.0
subroutine 18 19 94.7
pod n/a
total 144 163 88.3


line stmt bran cond sub pod time code
1             package Date::ManipX::Almanac::Lang;
2              
3 3     3   16926 use 5.010;
  3         12  
4              
5 3     3   13 use strict;
  3         5  
  3         52  
6 3     3   13 use warnings;
  3         6  
  3         72  
7              
8 3     3   38 use Carp;
  3         5  
  3         243  
9              
10             our $VERSION = '0.003';
11              
12 3     3   21 use constant REF_ARRAY => ref [];
  3         26  
  3         3019  
13              
14             =begin comment
15              
16             =head2 __new
17              
18             my $dmal = Date::ManipX::Almanac::Lang->__new(
19             sky => \@sky,
20             );
21              
22             This static method instantiates the object. The C argument is
23             required.
24              
25             =end comment
26              
27             =cut
28              
29             sub __new {
30 1     1   4 my ( $class, %arg ) = @_;
31              
32             REF_ARRAY eq ref $arg{sky}
33 1 50       3 or confess 'Bug - sky must be an ARRAY ref';
34              
35             # For seasons in the Southern hemisphere. We rely on all astonomical
36             # bodies having the same station attribute. If there is none we
37             # assume the Northern hemisphere.
38 1         3 my @season = ( 0 .. 3 );
39 1         2 my $sta;
40 1         6 @{ $arg{sky} }
41 1 50 33     2 and $sta = $arg{sky}[0]->get( 'station' )
      33        
42             and ( $sta->geodetic() )[0] < 0
43             and push @season, splice @season, 0, 2;
44              
45             return bless {
46             season => \@season,
47             sky => $arg{sky},
48 1         69 }, $class;
49             }
50              
51             =begin comment
52              
53             =head2 __body_data
54              
55             my $data = $self->__body_data();
56              
57             This method returns the table that drives L<__body_re()|/__body_re>. It
58             B be implemented by the subclass.
59              
60             The return is an array reference. Each element is a reference to an
61             array as returned by L<__body_re()|/__body_re>, except that the first
62             element is the name of the represented class.
63              
64             =end comment
65              
66             =cut
67              
68             =begin comment
69              
70             =head2 __body_re
71              
72             my ( $body_re, $spec_re, $interp_spec ) =
73             $self->__body_re( $body );
74              
75             Given an astronomical body, return one to three pieces of data:
76              
77             =over
78              
79             =item $body_re
80              
81             This is a regular expression that matches the name of the body in the
82             currently-set language. This regular expression is not expected to
83             capture anything.
84              
85             =item $spec_re
86              
87             If defined, this is a regular expression that matches any specific
88             events in the currently-set language that imply this body. This regular
89             expression must capture the event name in C<< (? ... ) >>. If
90             the event name captured in C<< (? ... ) >> does not imply an
91             event detail, that must be captured in C<< (? ... ) >>.
92              
93             =item $interp_spec
94              
95             If defined, this is a reference to a hash that converts the captured
96             specific events into internal form. This B be defined if
97             C<$spec_re> is.
98              
99             The keys are the strings captured by C<< (? ... ) >>,
100             normalized by converting to lower case, stripping diacriticals, and
101             removing white space.
102              
103             The values are array references.
104              
105             The first element of the array must be the internal name of the event.
106              
107             The second element depends on whether the regular expression captured
108             C<< (? ... ) >>. If it did, the second element is a reference to
109             a hash keyed on the normalized capture, and whose values are detail
110             specifications. If not, the second element is a detail specification.
111              
112             The detail specification is an L
113             event detail number.
114              
115             The third element of the array depends on whether the regular expression
116             captured C<< (? ... ) >>. If it did, the third element is a
117             reference to a hash keyed on the normalized capture, and whose values
118             are qualifier specifications. If not, the third element is ignored, and
119             need not be specified.
120              
121             As of this writing, the C<< (? ... ) >> capture is used only by
122             twilight events, to translate the local equivalents of C<'civil'>,
123             C<'nautical'>, and C<'astronomical'>.
124              
125             =back
126              
127             =end comment
128              
129             =cut
130              
131             sub __body_re {
132 3     3   6 my ( $self, $body ) = @_;
133              
134 3   66     5 foreach ( @{ $self->{body_data} ||= $self->__body_data() } ) {
  3         9  
135             $body->isa( $_->[0] )
136 12 100       49 and return @{ $_ }[ 1 .. $#$_ ];
  2         10  
137             }
138              
139 1         5 my $name = $self->__body_re_from_name(
140             $self->__string_to_re( body_name => $body->get( 'name' ) ) );
141 1         12 return qr/ $name /smxi;
142             }
143              
144             =begin comment
145              
146             =head2 __body_re_from_name
147              
148             my $re = $self->__body_re_from_name( $name );
149              
150             This is a fallback for anything not covered in
151             L<__body_data()|/__body_data> (e.g. stars). The argument is the name,
152             which has already been processed by
153             L<__string_to_re()|/__string_to_re()>. Despite the name of the method,
154             the return is a string, not a C object. The return must not
155             capture anything.
156              
157             This class' implementation simply returns the argument, but it can be
158             overridden to allow things like articles and prepositions. In langages
159             that inflect by gender, the override will need to allow both genders.
160              
161             =end comment
162              
163             =cut
164              
165             sub __body_re_from_name {
166 1     1   2 my ( undef, $name ) = @_;
167 1         2 return $name;
168             }
169              
170             =begin comment
171              
172             =head2 __event_capture
173              
174             $self->__event_capture( $interp_spec )
175              
176             This method is to be called when an event is captured. It is expected to
177             be called in the replacement side of an C operation. The
178             argument is the same C<$interp_spec> that is
179             returned by L<__body_re()|/__body_re>. The captures are interpreted and
180             stashed in the object for later recovery by
181             L<__event_retrieve()|/__event_retrieve>. The empty string is returned.
182              
183             =end comment
184              
185             =cut
186              
187             sub __event_capture {
188 14     14   31 my ( $self, undef, $interp_specific ) = @_;
189 14         32 delete $self->{captured_event};
190              
191 1     1   1209 my %capture = map { $_ => $self->__string_to_key( $_ => $+{$_} ) }
  1         1146  
  1         947  
  21         61  
192 14         30 grep { defined $+{$_} } qw{ specific general detail qual };
  56         247  
193              
194 14         33 foreach (
195             [ specific => $interp_specific ],
196             [ general => @{ $self->{interp_general} ||=
197 14   66     52 $self->__general_event_interp() } ],
198             ) {
199              
200 21         32 my ( $name, $interp_event ) = @{ $_ };
  21         31  
201              
202 21 100 100     76 next unless $interp_event && $capture{$name};
203              
204 14 50       38 my $evt = $interp_event->{$capture{$name}}
205             or confess "Bug - Event '$capture{$name}' not recognized";
206 14         19 ( $evt, my $det, my $qual ) = @{ $evt };
  14         28  
207              
208 14 100       27 if ( defined $capture{detail} ) {
209             defined( $det = $det->{$capture{detail}} )
210 6 50       18 or confess "Bug - $capture{$name} detail ",
211             "'$capture{detail}' not recognized";
212             }
213              
214 14 100       30 if ( defined $capture{qual} ) {
215 1 50       3 $qual
216             or confess "Bug - $capture{$name} qualifiers not defined";
217             defined( $qual = $qual->{$capture{qual}} )
218 1 50       5 or confess "Bug - $capture{$name} qualifier ",
219             "'$capture{qual}' not recognized";
220             }
221              
222 14         32 $self->{captured_event} = [ $evt, $det, $qual ];
223              
224 14         74 return '';
225             }
226 0         0 confess 'Bug - No event captured';
227             }
228              
229             =begin comment
230              
231             =head2 __event_retrieve
232              
233             my ( $body, $event, $detail ) = $dmal->__event_retrieve()
234              
235             This method retrieves the body and event found in the string being
236             parsed. If none was found it returns the empty list.
237              
238             =end comment
239              
240             =cut
241              
242             sub __event_retrieve {
243 14     14   23 my ( $self ) = @_;
244 14 50       15 return @{ $self->{captured_event} || [] };
  14         88  
245             }
246              
247             =begin comment
248              
249             =head2 __general_event_interp
250              
251             my ( $interp_general ) = @{
252             $self->_general_event_interp() };
253              
254             This method returns a reference to an array containing the hashes that
255             interpret the data captured by C<$general_re> (returned by
256             L<__general_event_re()|/__general_event_re>, below). It B be
257             implemented by the subclass.
258              
259             The return corresponds in structure and use to C<$interp_spec> as
260             described above in the documentation to L<__body_re()|/__body_re>,
261             except that the hash returned in C<$interp_general> is keyed by
262             C<< $+ >>.
263              
264             =head2 __general_event_re
265              
266             my $general_re = $self->__general_event_re();
267              
268             This method returns the regular expression that matches general events
269             in the currently-set language. It B be implemented by the
270             subclass.
271              
272             This regular expression B capture the event name in
273             C<< (? ... ) >>. If the event name captured in
274             C<< (? ... ) >> does not imply an event detail, that B be
275             captured in C<< (? ... ) >>.
276              
277             =end comment
278              
279             =cut
280              
281             =begin comment
282              
283             =head2 __ignore_after_re
284              
285             my $ignore_after_re = $self->__ignore_after_re()
286              
287             This method returns a language-specific regular expression which is
288             removed if it appears after the language-specific almanac event
289             specification, but is otherwise ignored. The idea is that you can use
290             this to allow the language-specific version of
291              
292             summer solstice on or after july 4 2020
293              
294             where the C<'on or after'> was matched by the returned expression.
295              
296             This implementation returns an expression that matches nothing.
297              
298             =end comment
299              
300             =cut
301              
302             sub __ignore_after_re {
303 0     0   0 return qr< .{0} >smx;
304             }
305              
306             =begin comment
307              
308             =head2 __ignore_before_re
309              
310             my $ignore_before_re = $self->__ignore_before_re()
311              
312             This method returns a language-specific regular expression which is
313             removed if it appears before the language-specific almanac event
314             specification, but is otherwise ignored. It is intended to be analogous
315             to C<__ignore_after_re()>, but at the moment I know of no use for it,
316             and have provided it simply for orthogonality's sake.
317              
318             This implementation returns an expression that matches nothing.
319              
320             =end comment
321              
322             =cut
323              
324             sub __ignore_before_re {
325 1     1   3 return qr< .{0} >smx;
326             }
327              
328             =begin comment
329              
330             =head2 __midnight
331              
332             say 'Midnight is ', $dmal->__midnight();
333              
334             This method returns the representation of midnight in the current
335             language. This may be substituted into the string given to
336             L to parse.
337              
338             This class provides a default implementation that returns C<'00:00:00'>,
339             which ought to be pretty portable among languages, but individual
340             subclasses are free to override this as necessary.
341              
342             =end comment
343              
344             =cut
345              
346             sub __midnight {
347 2     2   4 return '00:00:00';
348             }
349              
350             =begin comment
351              
352             =head2 __normalize_capture
353              
354             my $normalized_capture = $dmad->__normalize_capture( $capture_name, $capture_content );
355              
356             This method is called by L<__event_capture()|/__event_capture> when a
357             string is captured. The arguments are the capture name and its
358             contents. The return is the contents normalized appropriately to the
359             language.
360              
361             By default, it simply returns the capture buffer converted to lower
362             case, but individual languages can override it to do things like strip
363             diacriticals.
364              
365             =end comment
366              
367             =cut
368              
369 22     22   48 sub __normalize_capture { return lc $_[2] }
370              
371             =begin comment
372              
373             =head2 __parse_pre
374              
375             my ( $modified_string, $body, @event ) =
376             $dmal->__parse_pre( $string );
377              
378             This method is intended to be called by a parse method before handing
379             off the parse to the superclass. If an almanac event is recognized, it
380             returns the a modification of the original string, the astronomical
381             body, and the event name and detail. If no almanac event is recognized,
382             only the originial string is returned.
383              
384             The modified string has been processed by being converted to lower case,
385             having diacriticals stripped (heavy lifting done by
386             L), and the event
387             replaced by the language equivalent of midnight.
388              
389             The event name and detail are those expected by
390             L (q.v.).
391              
392             =end comment
393              
394             =cut
395              
396             sub __parse_pre {
397 14     14   25 my ( $self, $string ) = @_;
398             wantarray
399 14 50       28 or confess 'Bug - must call in list context';
400              
401 14 100       29 unless ( $self->{_sky} ) {
402 1         2 $self->{_sky} = \my @res;
403 1         3 my $general_re = $self->__general_event_re();
404 1         2 my $ignore_after_re = $self->__ignore_after_re();
405 1         5 my $ignore_before_re = $self->__ignore_before_re();
406 1         1 foreach my $body ( @{ $self->{sky} } ) {
  1         4  
407 3 50       9 my ( $body_re, $specific_re, $interp_specific ) =
408             $self->__body_re( $body )
409             or confess 'Bug - no re computed for ', $body->get( 'name' );
410 3 100       525 my $re = $specific_re ? qr/ \b $ignore_before_re? (?:
411             $specific_re |
412             (?: $body_re \s* $general_re ) |
413             (?: $general_re \s* $body_re )
414             ) $ignore_after_re? \b /smxi
415             : qr/ \b $ignore_before_re? (?: (?: $body_re \s* $general_re ) |
416             (?: $general_re \s* $body_re ) ) $ignore_after_re? \b /smxi;
417 3         20 push @res, [ $re, $body, $interp_specific ];
418             }
419             }
420              
421 14         98 ( my $match = $string ) =~ s/ \s+ / /smxg;
422              
423 14         23 foreach ( @{ $self->{_sky} } ) {
  14         33  
424 21         31 my ( $re, $body, @se ) = @{ $_ };
  21         42  
425 21 100       2725 $match =~ s/ $re / $self->__event_capture( $body, @se ) /smxie
  14         52  
426             or next;
427             # We may have captured the entire string. If so, we want today
428             # at midnight. 'Today' is out because it is language-specific.
429             # Midnight is considerably less so, and implies today.
430 14 100       65 $match =~ m/ \S /smx
431             or $match = $self->__midnight();
432 14         50 return ( $match, $body, $self->__event_retrieve() );
433             }
434              
435 0         0 return ( $string );
436             }
437              
438             =begin comment
439              
440             =head2 __season_to_detail
441              
442             my @detail = $self->__season_to_detail();
443              
444             This method returns an array that maps season number (Spring = 0 through
445             Winter = 3) to an L seasonal event
446             detail number. This is C<( 0 .. 3 )> in the Northern hemisphere, but
447             C<( 2, 3, 0, 1 )> in the Southern hemisphere.
448              
449             With no justification but computational convenience, the Equator is
450             considered to be in the Northern hemisphere.
451              
452             =end comment
453              
454             =cut
455              
456             sub __season_to_detail {
457 1     1   2 my ( $self ) = @_;
458 1         2 return @{ $self->{season} };
  1         4  
459             }
460              
461             =begin comment
462              
463             my $key = $dmal->__string_to_key( $capture_name, $string );
464              
465             This method converts a string to an equivalent (for our purposes)
466             hash key.
467              
468             The idea is to mimic (probably badly) something like
469             L level C<1>, but in a regular
470             expression. This is done by:
471              
472             =over
473              
474             =item 1) Calling __normalize_capture()
475              
476             =item 2) Removing everything that is not a word character
477              
478             =back
479              
480             =end comment
481              
482             =cut
483              
484             sub __string_to_key {
485 21     21   63 my ( $self, $name, $string ) = @_;
486 21         43 my $key = $self->__normalize_capture( $name, $string );
487 21         33 $key =~ s/ \W+ //smxg;
488 21         72 return $key;
489             }
490              
491             =begin comment
492              
493             my $re_string = $dmal->__string_to_re( $name, $string );
494              
495             This method converts a string to an equivalent (for our purposes)
496             regular expression. The return is a string, not a C object,
497             though that is subject to change.
498              
499             The C<$name> object says what the string is used for, and is simply
500             passed through to L<__normalize_capture()|/__normalize_capture>, which
501             does most of the work.
502              
503             The idea is to mimic (probably badly) something like
504             L level C<1>, but in a regular
505             expression. This is done by:
506              
507             =over
508              
509             =item 1) Calling __normalize_capture()
510              
511             =item 2) Removing everything that is not a word character or a space
512              
513             =item 3) Converting spaces to C<\s*>
514              
515             =back
516              
517             =end comment
518              
519             =cut
520              
521             sub __string_to_re {
522 1     1   18 my ( $self, $name, $string ) = @_;
523 1         4 my $re = $self->__normalize_capture( $name, $string );
524 1         4 $re =~ s/ [^\w\s]+ //smxg;
525 1         2 $re =~ s/ \s+ /\\s*/smxg;
526 1         5 return $re;
527             }
528              
529             1;
530              
531             __END__