File Coverage

blib/lib/Text/Parser.pm
Criterion Covered Total %
statement 174 174 100.0
branch 75 76 98.6
condition 20 21 95.2
subroutine 46 46 100.0
pod 10 11 90.9
total 325 328 99.0


line stmt bran cond sub pod time code
1 30     30   1781568 use warnings;
  30         147  
  30         920  
2 30     30   140 use strict;
  30         50  
  30         609  
3 30     30   131 use feature ':5.14';
  30         51  
  30         4545  
4              
5             package Text::Parser 0.927;
6              
7             # ABSTRACT: Simplifies text parsing. Easily extensible to parse any text format.
8              
9              
10 30     30   10551 use Moose;
  30         9369710  
  30         204  
11 30     30   218376 use MooseX::CoverableModifiers;
  30         149600  
  30         175  
12 30     30   15224 use MooseX::StrictConstructor;
  30         824731  
  30         118  
13 30     30   271049 use namespace::autoclean;
  30         66  
  30         150  
14 30     30   2605 use Moose::Util 'apply_all_roles', 'ensure_all_roles';
  30         71  
  30         248  
15 30     30   10091 use Moose::Util::TypeConstraints;
  30         64  
  30         237  
16 30     30   79853 use String::Util qw(trim ltrim rtrim eqq);
  30         90269  
  30         2342  
17 30     30   12037 use Text::Parser::Errors;
  30         120  
  30         4507  
18 30     30   15837 use Text::Parser::Rule;
  30         125  
  30         2808  
19              
20             enum 'Text::Parser::Types::MultilineType' => [qw(join_next join_last)];
21             enum 'Text::Parser::Types::TrimType' => [qw(l r b n)];
22              
23 30     30   275 no Moose::Util::TypeConstraints;
  30         67  
  30         364  
24 30     30   26613 use FileHandle;
  30         249646  
  30         162  
25 30     30   9373 use Try::Tiny;
  30         74  
  30         8804  
26              
27              
28             sub BUILD {
29 47     47 0 110114     my $self = shift;
30 47 100       2180     ensure_all_roles $self, 'Text::Parser::AutoSplit' if $self->auto_split;
31 47 100       6175     return if not defined $self->multiline_type;
32 11         56     ensure_all_roles $self, 'Text::Parser::Multiline';
33             }
34              
35              
36             has auto_chomp => (
37                 is => 'rw',
38                 isa => 'Bool',
39                 lazy => 1,
40                 default => 0,
41             );
42              
43              
44             has auto_split => (
45                 is => 'rw',
46                 isa => 'Bool',
47                 lazy => 1,
48                 default => 0,
49                 trigger => \&__newval_auto_split,
50             );
51              
52             sub __newval_auto_split {
53 81     81   37092     my ( $self, $newval, $oldval ) = ( shift, shift, shift );
54 81 100       357     ensure_all_roles $self, 'Text::Parser::AutoSplit' if $newval;
55 81 100 100     232973     $self->_clear_all_fields if not $newval and $oldval;
56             }
57              
58              
59             has auto_trim => (
60                 is => 'rw',
61                 isa => 'Text::Parser::Types::TrimType',
62                 lazy => 1,
63                 default => 'n',
64             );
65              
66              
67             has FS => (
68                 is => 'rw',
69                 isa => 'RegexpRef',
70                 lazy => 1,
71                 default => sub {qr/\s+/},
72             );
73              
74              
75             has multiline_type => (
76                 is => 'rw',
77                 isa => 'Text::Parser::Types::MultilineType|Undef',
78                 lazy => 1,
79                 default => undef,
80             );
81              
82             around multiline_type => sub {
83 1209     1209   62969     my ( $orig, $self ) = ( shift, shift );
84 1209         30242     my $oldval = $orig->($self);
85 1209 100 100     7122     return $oldval if not @_ or eqq( $_[0], $oldval );
86 8         115     return __newval_multi_line( $orig, $self, @_ );
87             };
88              
89             sub __newval_multi_line {
90 8     8   256     my ( $orig, $self, $newval ) = ( shift, shift, shift );
91 8 100       64     ensure_all_roles( $self, 'Text::Parser::Multiline' )
92                     if defined $newval;
93 8         32666     return $orig->( $self, $newval );
94             }
95              
96              
97             has _obj_rules => (
98                 is => 'rw',
99                 isa => 'ArrayRef[Text::Parser::Rule]',
100                 lazy => 1,
101                 default => sub { [] },
102                 traits => ['Array'],
103                 handles => {
104                     _push_rule => 'push',
105                     _has_no_rules => 'is_empty',
106                     _get_rules => 'elements',
107                 },
108             );
109              
110             sub add_rule {
111 18     18 1 118     my $self = shift;
112 18 100       406     $self->auto_split(1) if not $self->auto_split;
113 18         537     my $rule = Text::Parser::Rule->new(@_);
114 18         549     $self->_push_rule($rule);
115             }
116              
117              
118             sub clear_rules {
119 2     2 1 5     my $self = shift;
120 2         64     $self->_obj_rules( [] );
121 2         59     $self->_clear_begin_rule;
122 2         59     $self->_clear_end_rule;
123             }
124              
125              
126             has _begin_rule => (
127                 is => 'rw',
128                 isa => 'Text::Parser::Rule',
129                 predicate => '_has_begin_rule',
130                 clearer => '_clear_begin_rule',
131             );
132              
133             sub BEGIN_rule {
134 4     4 1 22     my $self = shift;
135 4 100       101     $self->auto_split(1) if not $self->auto_split;
136 4         14     my (%opt) = _defaults_for_begin_end(@_);
137 4         19     $self->_modify_rule( '_begin_rule', %opt );
138             }
139              
140             sub _defaults_for_begin_end {
141 7     7   24     my (%opt) = @_;
142 7 100       24     $opt{dont_record} = 1 if not exists $opt{dont_record};
143 7 100       20     delete $opt{if} if exists $opt{if};
144 7 100       18     delete $opt{continue_to_next} if exists $opt{continue_to_next};
145 7         29     return (%opt);
146             }
147              
148             sub _modify_rule {
149 7     7   21     my ( $self, $func, %opt ) = @_;
150 7         19     my $pred = '_has' . $func;
151 7 100       201     $self->_append_rule_lines( $func, \%opt ) if $self->$pred();
152 7         175     my $rule = Text::Parser::Rule->new(%opt);
153 7         163     $self->$func($rule);
154             }
155              
156             sub _append_rule_lines {
157 3     3   9     my ( $self, $func, $opt ) = ( shift, shift, shift );
158 3         72     my $old = $self->$func();
159 3         64     $opt->{do} = $old->action . $opt->{do};
160             }
161              
162              
163             has _end_rule => (
164                 is => 'rw',
165                 isa => 'Text::Parser::Rule',
166                 predicate => '_has_end_rule',
167                 clearer => '_clear_end_rule',
168             );
169              
170             sub END_rule {
171 3     3 1 21     my $self = shift;
172 3 100       68     $self->auto_split(1) if not $self->auto_split;
173 3         14     my (%opt) = _defaults_for_begin_end(@_);
174 3         14     $self->_modify_rule( '_end_rule', %opt );
175             }
176              
177              
178             sub read {
179 74     74 1 65983     my $self = shift;
180 74 100       379     return if not defined $self->_handle_read_inp(@_);
181 68         446     $self->_run_begin_end_block('_begin_rule');
182 68         336     $self->__read_and_close_filehandle;
183 59         299     $self->_run_begin_end_block('_end_rule');
184 59         1487     $self->_ExAWK_symbol_table( {} );
185             }
186              
187             sub _handle_read_inp {
188 74     74   153     my $self = shift;
189 74 100       282     return $self->filehandle if not @_;
190 72 100 100     858     return if not ref( $_[0] ) and not $_[0];
191 71 100       2143     return $self->filename(@_) if not ref( $_[0] );
192 6         20     return $self->filehandle(@_);
193             }
194              
195             has _ExAWK_symbol_table => (
196                 is => 'rw',
197                 isa => 'HashRef[Any]',
198                 default => sub { {} },
199                 lazy => 1,
200             );
201              
202             sub _run_begin_end_block {
203 127     127   345     my ( $self, $func ) = ( shift, shift );
204 127         324     my $pred = '_has' . $func;
205 127 100       3999     return if not $self->$pred();
206 4         87     my $rule = $self->$func();
207 4         19     $rule->_run( $self, 0 );
208             }
209              
210             sub __read_and_close_filehandle {
211 68     68   134     my $self = shift;
212 68         369     $self->_prep_to_read_file;
213 68         406     $self->__read_file_handle;
214 59 100       2053     $self->_close_filehandles if $self->_has_filename;
215 59         1058     $self->_clear_this_line;
216             }
217              
218             sub _prep_to_read_file {
219 68     68   202     my $self = shift;
220 68         2219     $self->_reset_line_count;
221 68         2369     $self->_empty_records;
222 68         2160     $self->_clear_abort;
223             }
224              
225             sub __read_file_handle {
226 68     68   816     my $self = shift;
227 68         507     my $fh = $self->filehandle();
228 68         1310     while (<$fh>) {
229 517 100       1336         last if not $self->__parse_line($_);
230                 }
231             }
232              
233             sub __parse_line {
234 517     517   1610     my ( $self, $line ) = ( shift, shift );
235 517         16682     $self->_next_line_parsed();
236 517         1631     $line = $self->_def_line_manip($line);
237 517         6590     $self->_set_this_line($line);
238 517         2424     $self->save_record($line);
239 511         13044     return not $self->has_aborted;
240             }
241              
242             sub _def_line_manip {
243 517     517   930     my ( $self, $line ) = ( shift, shift );
244 517 100       9898     chomp $line if $self->auto_chomp;
245 517         1280     return $self->_trim_line($line);
246             }
247              
248             sub _trim_line {
249 517     517   1588     my ( $self, $line ) = ( shift, shift );
250 517 100       9745     return $line if $self->auto_trim eq 'n';
251 69 100       521     return trim($line) if $self->auto_trim eq 'b';
252 8 100       148     return ltrim($line) if $self->auto_trim eq 'l';
253 4         11     return rtrim($line);
254             }
255              
256              
257             has filename => (
258                 is => 'rw',
259                 isa => 'Str|Undef',
260                 lazy => 1,
261                 init_arg => undef,
262                 default => undef,
263                 predicate => '_has_filename',
264                 clearer => '_clear_filename',
265                 trigger => \&_set_filehandle,
266             );
267              
268             sub _set_filehandle {
269 71     71   19560     my $self = shift;
270 71 100       2583     return $self->_clear_filename if not defined $self->filename;
271 70         352     $self->_save_filehandle( $self->__get_valid_fh );
272             }
273              
274             sub __get_valid_fh {
275 70     70   155     my $self = shift;
276 70         313     my $fname = $self->_get_valid_text_filename;
277 70 100       910     return FileHandle->new( $fname, 'r' ) if defined $fname;
278 4         94     $fname = $self->filename;
279 4         100     $self->_clear_filename;
280 4         12     $self->_throw_invalid_file_exception($fname);
281             }
282              
283             # Don't touch: Override this in Text::Parser::AutoUncompress
284             sub _get_valid_text_filename {
285 70     70   135     my $self = shift;
286 70         1636     my $fname = $self->filename;
287 70 100 66     7402     return $fname if -f $fname and -r $fname and -T $fname;
      100        
288 4         15     return;
289             }
290              
291             # Don't touch: Override this is Text::Parser::AutoUncompress
292             sub _throw_invalid_file_exception {
293 4     4   10     my ( $self, $fname ) = ( shift, shift );
294 4 100       44     die invalid_filename( name => $fname ) if not -f $fname;
295 1 50       14     die file_not_readable( name => $fname ) if not -r $fname;
296 1         8     die file_not_plain_text( name => $fname );
297             }
298              
299              
300             has filehandle => (
301                 is => 'rw',
302                 isa => 'FileHandle|Undef',
303                 lazy => 1,
304                 init_arg => undef,
305                 default => undef,
306                 predicate => '_has_filehandle',
307                 writer => '_save_filehandle',
308                 reader => '_get_filehandle',
309                 clearer => '_close_filehandles',
310             );
311              
312             sub filehandle {
313 84     84 1 3548     my $self = shift;
314 84 100 100     3013     return if not @_ and not $self->_has_filehandle;
315 81 100       624     $self->_save_filehandle(@_) if @_;
316 77 100       393     $self->_clear_filename if @_;
317 77         2472     return $self->_get_filehandle;
318             }
319              
320              
321             has lines_parsed => (
322                 is => 'ro',
323                 isa => 'Int',
324                 lazy => 1,
325                 init_arg => undef,
326                 default => 0,
327                 traits => ['Counter'],
328                 handles => {
329                     _next_line_parsed => 'inc',
330                     _reset_line_count => 'reset',
331                 }
332             );
333              
334              
335             sub save_record {
336 313     313 1 7349     my ( $self, $record ) = ( shift, shift );
337 313 100       9682     $self->_has_no_rules
338                     ? $self->push_records($record)
339                     : $self->_run_through_rules;
340             }
341              
342             sub _run_through_rules {
343 144     144   190     my $self = shift;
344 144         3901     foreach my $rule ( $self->_get_rules ) {
345 277 100       655         next if not $rule->_test($self);
346 97         279         $rule->_run( $self, 0 );
347 97 100       2250         last if not $rule->continue_to_next;
348                 }
349             }
350              
351              
352             has _current_line => (
353                 is => 'ro',
354                 isa => 'Str|Undef',
355                 init_arg => undef,
356                 writer => '_set_this_line',
357                 reader => 'this_line',
358                 clearer => '_clear_this_line',
359                 default => undef,
360             );
361              
362              
363              
364             has abort => (
365                 is => 'rw',
366                 isa => 'Bool',
367                 lazy => 1,
368                 default => 0,
369                 traits => ['Bool'],
370                 reader => 'has_aborted',
371                 handles => {
372                     abort_reading => 'set',
373                     _clear_abort => 'unset'
374                 },
375             );
376              
377              
378             has records => (
379                 isa => 'ArrayRef[Any]',
380                 is => 'ro',
381                 lazy => 1,
382                 default => sub { return []; },
383                 auto_deref => 1,
384                 init_arg => undef,
385                 traits => ['Array'],
386                 handles => {
387                     get_records => 'elements',
388                     push_records => 'push',
389                     pop_record => 'pop',
390                     _empty_records => 'clear',
391                     _num_records => 'count',
392                     _access_record => 'accessor',
393                 },
394             );
395              
396              
397             sub last_record {
398 16     16 1 608     my $self = shift;
399 16         567     my $count = $self->_num_records();
400 16 100       43     return if not $count;
401 15         766     return $self->_access_record( $count - 1 );
402             }
403              
404              
405             sub is_line_continued {
406 71     71 1 139     my $self = shift;
407 71 100       136     return 0 if not defined $self->multiline_type;
408 65 100 100     136     return 0
409                     if $self->multiline_type eq 'join_last'
410                     and $self->lines_parsed() == 1;
411 60         188     return 1;
412             }
413              
414              
415             sub join_last_line {
416 57     57 1 77     my $self = shift;
417 57         107     my ( $last, $line ) = ( shift, shift );
418 57         428     return $last . $line;
419             }
420              
421              
422             __PACKAGE__->meta->make_immutable;
423              
424 30     30   63599 no Moose;
  30         73  
  30         231  
425              
426             1;
427              
428             __END__
429            
430             =pod
431            
432             =encoding UTF-8
433            
434             =head1 NAME
435            
436             Text::Parser - Simplifies text parsing. Easily extensible to parse any text format.
437            
438             =head1 VERSION
439            
440             version 0.927
441            
442             =head1 SYNOPSIS
443            
444             use Text::Parser;
445            
446             my $parser = Text::Parser->new();
447             $parser->read(shift);
448             print $parser->get_records, "\n";
449            
450             The above code prints the content of the file (named in the first argument) to C<STDOUT>.
451            
452             my $parser = Text::Parser->new();
453             $parser->add_rule(do => 'print');
454             $parser->read(shift);
455            
456             This example also dones the same as the earlier one. For more complex examples see the L<manual|Text::Parser::Manual>.
457            
458             =head1 OVERVIEW
459            
460             The L<need|Text::Parser::Manual/MOTIVATION> for this class stems from the fact that text parsing is the most common thing that programmers do, and yet there is no lean, simple way to do it efficiently. Most programmers still write boilerplate code with a C<while> loop.
461            
462             Instead C<Text::Parser> allows programmers to parse text with terse, self-explanatory L<rules|Text::Parser::Manual::ExtendedAWKSyntax>, whose structure is very similar to L<AWK|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ>, but extends beyond the capability of AWK. Incidentally, AWK is L<one of the ancestors of Perl|http://history.perl.org/PerlTimeline.html>! One would have expected Perl to extend the capabilities of AWK, although that's not really the case. Command-line C<perl -lane> or even C<perl -lan script.pl> are L<very limited|Text::Parser::Manual::ComparingWithNativePerl> in what they can do. Programmers cannot use them for serious projects. And parsing text files in regular Perl involves writing the same C<while> loop again. L<This website|https://perl-begin.org/uses/text-parsing/> summarizes the options available in Perl so far.
463            
464             With C<Text::Parser>, a developer can focus on specifying a grammar and then simply C<read> the file. The C<L<read|/read>> method automatically runs each rule collecting records from the text input into an array internally. And finally C<L<get_records|/get_records>> can retrieve the records. Thus the programmer now has the power of Perl to create complex data structures, along with the elegance of AWK to parse text files. The L<manuals|Text::Parser::Manual> illustrate this with L<examples|Text::Parser::Manual::ComparingWithNativePerl>.
465            
466             =head1 CONSTRUCTOR
467            
468             =head2 new
469            
470             Takes optional attributes as in example below. See section L<ATTRIBUTES|/ATTRIBUTES> for a list of the attributes and their description.
471            
472             my $parser = Text::Parser->new(
473             auto_chomp => 0,
474             multiline_type => 'join_last',
475             auto_trim => 'b',
476             auto_split => 1,
477             FS => qr/\s+/,
478             );
479            
480             =head1 ATTRIBUTES
481            
482             The attributes below can be used as options to the C<new> constructor. Each attribute has an accessor with the same name.
483            
484             =head2 auto_chomp
485            
486             Read-write attribute. Takes a boolean value as parameter. Defaults to C<0>.
487            
488             print "Parser will chomp lines automatically\n" if $parser->auto_chomp;
489            
490             =head2 auto_split
491            
492             Read-write boolean attribute. Defaults to C<0> (false). Indicates if the parser will automatically split every line into fields.
493            
494             If it is set to a true value, each line will be split into fields, and a set of methods (a quick list L<here|/"Other methods available on auto_split">) become accessible within the C<L<save_record|/save_record>> method. These methods are documented in L<Text::Parser::AutoSplit>.
495            
496             =head2 auto_trim
497            
498             Read-write attribute. The values this can take are shown under the C<L<new|/new>> constructor also. Defaults to C<'n'> (neither side spaces will be trimmed).
499            
500             $parser->auto_trim('l'); # 'l' (left), 'r' (right), 'b' (both), 'n' (neither) (Default)
501            
502             =head2 FS
503            
504             Read-write attribute that can be used to specify the field separator to be used by the C<auto_split> feature. It must be a regular expression reference enclosed in the C<qr> function, like C<qr/\s+|[,]/> which will split across either spaces or commas. The default value for this argument is C<qr/\s+/>.
505            
506             The name for this attribute comes from the built-in C<FS> variable in the popular L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html>.
507            
508             $parser->FS( qr/\s+\(*|\s*\)/ );
509            
510             C<FS> I<can> be changed in your implementation of C<save_record>. But the changes would take effect only on the next line.
511            
512             =head2 multiline_type
513            
514             If the target text format allows line-wrapping with a continuation character, the C<multiline_type> option tells the parser to join them into a single line. When setting this attribute, one must re-define L<two more methods|/"PARSING LINE-WRAPPED FILES">.
515            
516             By default, the read-write C<multiline_type> attribute has a value of C<undef>, i.e., the target text format will not have wrapped lines. It can be set to either C<'join_next'> or C<'join_last'>.
517            
518             $parser->multiline_type(undef);
519             $parser->multiline_type('join_next');
520            
521             my $mult = $parser->multiline_type;
522             print "Parser is a multi-line parser of type: $mult" if defined $mult;
523            
524             =over 4
525            
526             =item *
527            
528             If the target format allows line-wrapping I<to the B<next>> line, set C<multiline_type> to C<join_next>.
529            
530             =item *
531            
532             If the target format allows line-wrapping I<from the B<last>> line, set C<multiline_type> to C<join_last>.
533            
534             =item *
535            
536             To "slurp" a file into a single string, set C<multiline_type> to C<join_last>. In this special case, you don't need to re-define the C<L<is_line_continued|/is_line_continued>> and C<L<join_last_line|/join_last_line>> methods.
537            
538             =back
539            
540             =head1 METHODS
541            
542             These are meant to be called from the C<::main> program or within subclasses. In general, don't override them - just use them.
543            
544             =head2 add_rule
545            
546             Takes a hash as input. The keys of this hash must be the attributes of the L<Text::Parser::Rule> class constructor and the values should also meet the requirements of that constructor.
547            
548             $parser->add_rule(do => '', dont_record => 1); # Empty rule: does nothing
549             $parser->add_rule(if => 'm/li/, do => 'print', dont_record); # Prints lines with 'li'
550             $parser->add_rule( do => 'uc($3)' ); # Saves records of upper-cased third elements
551            
552             Calling this method without any arguments will throw an exception. The method internally sets the C<auto_split> attribute.
553            
554             =head2 clear_rules
555            
556             Takes no arguments, returns nothing. Clears the rules that were added to the object.
557            
558             $parser->clear_rules;
559            
560             This is useful to be able to re-use the parser after a C<read> call, to parse another text with another set of rules. The C<clear_rules> method does clear even the rules set up by C<L<BEGIN_rule|/BEGIN_rule>> and C<L<END_rule|/END_rule>>.
561            
562             =head2 BEGIN_rule
563            
564             Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored.
565            
566             $parser->BEGIN_rule(do => '~count = 0;');
567            
568             =over 4
569            
570             =item *
571            
572             Since any C<if> key is ignored, the C<do> key is always C<eval>uated. Multiple calls to C<BEGIN_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
573            
574             =item *
575            
576             The C<BEGIN> block is mainly used to initialize some variables. So by default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved.
577            
578             =back
579            
580             =head2 END_rule
581            
582             Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored. Similar to C<BEGIN_rule>, but the actions in the C<END_rule> will be executed at the end of the C<read> method.
583            
584             $parser->END_rule(do => 'print ~count, "\n";');
585            
586             =over 4
587            
588             =item *
589            
590             Since any C<if> key is ignored, the C<do> key is always C<eval>uated. Multiple calls to C<END_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
591            
592             =item *
593            
594             The C<END> block is mainly used to do final processing of collected records. So by default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved.
595            
596             =back
597            
598             =head2 read
599            
600             Takes a single optional argument that can be either a string containing the name of the file, or a filehandle reference (a C<GLOB>) like C<\*STDIN> or an object of the C<L<FileHandle>> class.
601            
602             $parser->read($filename); # Read the file
603             $parser->read(\*STDIN); # Read the filehandle
604            
605             The above could also be done in two steps if the developer so chooses.
606            
607             $parser->filename($filename);
608             $parser->read(); # equiv: $parser->read($filename)
609            
610             $parser->filehandle(\*STDIN);
611             $parser->read(); # equiv: $parser->read(\*STDIN)
612            
613             The method returns once all records have been read, or if an exception is thrown, or if reading has been aborted with the C<L<abort_reading|/abort_reading>> method.
614            
615             Any C<close> operation will be handled (even if any exception is thrown), as long as C<read> is called with a file name parameter - not if you call with a file handle or C<GLOB> parameter.
616            
617             $parser->read('myfile.txt'); # Will close file automatically
618            
619             open MYFH, "<myfile.txt" or die "Can't open file myfile.txt at ";
620             $parser->read(\*MYFH); # Will not close MYFH
621             close MYFH;
622            
623             B<Note:> To extend the class to other text formats, override C<L<save_record|/save_record>>.
624            
625             =head2 filename
626            
627             Takes an optional string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C<undef> if no file has been opened.
628            
629             print "Last read ", $parser->filename, "\n";
630            
631             The value stored is "persistent" - meaning that the method remembers the last file that was C<L<read|/read>>.
632            
633             $parser->read(shift @ARGV);
634             print $parser->filename(), ":\n",
635             "=" x (length($parser->filename())+1),
636             "\n",
637             $parser->get_records(),
638             "\n";
639            
640             A C<read> call with a filehandle, will clear the last file name.
641            
642             $parser->read(\*MYFH);
643             print "Last file name is lost\n" if not defined $parser->filename();
644            
645             =head2 filehandle
646            
647             Takes an optional argument, that is a filehandle C<GLOB> (such as C<\*STDIN>) or an object of the C<FileHandle> class. Returns the filehandle last saved, or C<undef> if none was saved.
648            
649             my $fh = $parser->filehandle();
650            
651             Like C<L<filename|/filename>>, C<filehandle> is also "persistent". Its old value is lost when either C<filename> is set, or C<read> is called with a filename.
652            
653             $parser->read(\*STDOUT);
654             my $lastfh = $parser->filehandle(); # Will return glob of STDOUT
655            
656             =head2 lines_parsed
657            
658             Takes no arguments. Returns the number of lines last parsed. Every call to C<read>, causes the value to be auto-reset.
659            
660             print $parser->lines_parsed, " lines were parsed\n";
661            
662             =head2 has_aborted
663            
664             Takes no arguments, returns a boolean to indicate if text reading was aborted in the middle.
665            
666             print "Aborted\n" if $parser->has_aborted();
667            
668             =head2 get_records
669            
670             Takes no arguments. Returns an array containing all the records saved by the parser.
671            
672             foreach my $record ( $parser->get_records ) {
673             $i++;
674             print "Record: $i: ", $record, "\n";
675             }
676            
677             =head2 pop_record
678            
679             Takes no arguments and pops the last saved record.
680            
681             my $last_rec = $parser->pop_record;
682             $uc_last = uc $last_rec;
683             $parser->save_record($uc_last);
684            
685             =head2 last_record
686            
687             Takes no arguments and returns the last saved record. Leaves the saved records untouched.
688            
689             my $last_rec = $parser->last_record;
690            
691             =head1 USE ONLY IN RULES AND SUBCLASS
692            
693             Do NOT override these methods. They are valid only within a subclass, inside the user-implementation of methods described under L<OVERRIDE IN SUBCLASS|/"OVERRIDE IN SUBCLASS">.
694            
695             =head2 this_line
696            
697             Takes no arguments, and returns the current line being parsed. For example:
698            
699             sub save_record {
700             # ...
701             do_something($self->this_line);
702             # ...
703             }
704            
705             =head2 abort_reading
706            
707             Takes no arguments. Returns C<1>. To be used only in the derived class to abort C<read> in the middle.
708            
709             sub save_record {
710             # ...
711             $self->abort_reading if some_condition($self->this_line);
712             # ...
713             }
714            
715             =head2 push_records
716            
717             This is useful if one needs to implement an C<include>-like command in some text format. The example below illustrates this.
718            
719             package OneParser;
720             use Moose;
721             extends 'Text::Parser';
722            
723             my save_record {
724             # ...
725             # Under some condition:
726             my $parser = AnotherParser->new();
727             $parser->read($some_file)
728             $parser->push_records($parser->get_records);
729             # ...
730             }
731            
732             =head2 Other methods available on C<auto_split>
733            
734             When the C<L<auto_split|/auto_split>> attribute is on, (or if it is turned on later), the following additional methods become available:
735            
736             =over 4
737            
738             =item *
739            
740             L<NF|Text::Parser::AutoSplit/NF>
741            
742             =item *
743            
744             L<fields|Text::Parser::AutoSplit/fields>
745            
746             =item *
747            
748             L<field|Text::Parser::AutoSplit/field>
749            
750             =item *
751            
752             L<field_range|Text::Parser::AutoSplit/field_range>
753            
754             =item *
755            
756             L<join_range|Text::Parser::AutoSplit/join_range>
757            
758             =item *
759            
760             L<find_field|Text::Parser::AutoSplit/find_field>
761            
762             =item *
763            
764             L<find_field_index|Text::Parser::AutoSplit/find_field_index>
765            
766             =item *
767            
768             L<splice_fields|Text::Parser::AutoSplit/splice_fields>
769            
770             =back
771            
772             =head1 OVERRIDE IN SUBCLASS
773            
774             The following methods should never be called in the C<::main> program. They may be overridden (or re-defined) in a subclass.
775            
776             =head2 save_record
777            
778             This method may be re-defined in a subclass to parse the target text format. The default implementation takes a single argument and stores it as a record. If no arguments are passed, C<undef> is stored as a record. Note that unlike earlier versions of C<Text::Parser> it is not required to override this method in your derived class. You can simply use the rules instead.
779            
780             For a developer re-defining C<save_record>, in addition to C<L<this_line|/"this_line">>, six additional methods become available if the C<auto_split> attribute is set. These methods are described in greater detail in L<Text::Parser::AutoSplit>, and they are accessible only within C<save_record>.
781            
782             B<Note:> Developers may store records in any form - string, array reference, hash reference, complex data structure, or an object of some class. The program that reads these records using C<L<get_records|/get_records>> has to interpret them. So developers should document the records created by their own implementation of C<save_record>.
783            
784             =head2 PARSING LINE-WRAPPED FILES
785            
786             These methods are useful when parsing line-wrapped files, i.e., if the target text format allows wrapping the content of one line into multiple lines. In such cases, you should C<extend> the C<Text::Parser> class and override the following methods.
787            
788             =head3 is_line_continued
789            
790             If the target text format supports line-wrapping, the developer must override and implement this method. Your method should take a string argument and return a boolean indicating if the line is continued or not.
791            
792             There is a default implementation shipped with this class with return values as follows:
793            
794             multiline_type | Return value
795             ------------------+---------------------------------
796             undef | 0
797             join_last | 0 for first line, 1 otherwise
798             join_next | 1
799            
800             =head3 join_last_line
801            
802             Again, the developer should implement this method. This method should take two strings, join them while removing any continuation characters, and return the result. The default implementation just concatenates two strings and returns the result without removing anything (not even C<chomp>). See L<Text::Parser::Multiline> for more on this.
803            
804             =head1 EXAMPLES
805            
806             You can find example code in L<Text::Parser::Manual::ComparingWithNativePerl>.
807            
808             =head1 THINGS TO BE DONE
809            
810             This package is still a work in progress. Future versions are expected to include features to:
811            
812             =over 4
813            
814             =item *
815            
816             read and parse from a buffer
817            
818             =item *
819            
820             automatically uncompress input
821            
822             =item *
823            
824             I<suggestions welcome ...>
825            
826             =back
827            
828             Contributions and suggestions are welcome and properly acknowledged.
829            
830             =head1 SEE ALSO
831            
832             =over 4
833            
834             =item *
835            
836             L<Text::Parser::Manual> - Read this manual
837            
838             =item *
839            
840             L<The AWK Programming Language|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ> - by B<A>ho, B<W>einberg, and B<K>ernighan.
841            
842             =item *
843            
844             L<Text::Parser::Errors> - documentation of the exceptions this class throws
845            
846             =item *
847            
848             L<Text::Parser::Multiline> - how to read line-wrapped text input
849            
850             =back
851            
852             =head1 BUGS
853            
854             Please report any bugs or feature requests on the bugtracker website
855             L<http://github.com/balajirama/Text-Parser/issues>
856            
857             When submitting a bug or request, please include a test-file or a
858             patch to an existing test-file that illustrates the bug or desired
859             feature.
860            
861             =head1 AUTHOR
862            
863             Balaji Ramasubramanian <balajiram@cpan.org>
864            
865             =head1 COPYRIGHT AND LICENSE
866            
867             This software is copyright (c) 2018-2019 by Balaji Ramasubramanian.
868            
869             This is free software; you can redistribute it and/or modify it under
870             the same terms as the Perl 5 programming language system itself.
871            
872             =head1 CONTRIBUTORS
873            
874             =for stopwords H.Merijn Brand - Tux Mohammad S Anwar
875            
876             =over 4
877            
878             =item *
879            
880             H.Merijn Brand - Tux <h.m.brand@xs4all.nl>
881            
882             =item *
883            
884             Mohammad S Anwar <mohammad.anwar@yahoo.com>
885            
886             =back
887            
888             =cut
889