File Coverage

blib/lib/Text/Parser.pm
Criterion Covered Total %
statement 183 183 100.0
branch 77 78 98.7
condition 20 21 95.2
subroutine 50 50 100.0
pod 10 11 90.9
total 340 343 99.1


line stmt bran cond sub pod time code
1 30     30   1893039 use warnings;
  30         150  
  30         937  
2 30     30   149 use strict;
  30         55  
  30         604  
3 30     30   143 use feature ':5.14';
  30         49  
  30         4660  
4              
5             package Text::Parser 0.926;
6              
7             # ABSTRACT: Simplifies text parsing. Easily extensible to parse any text format.
8              
9              
10 30     30   11107 use Moose;
  30         9901451  
  30         229  
11 30     30   230820 use MooseX::CoverableModifiers;
  30         157775  
  30         191  
12 30     30   16343 use MooseX::StrictConstructor;
  30         860686  
  30         122  
13 30     30   285444 use namespace::autoclean;
  30         75  
  30         158  
14 30     30   2755 use Moose::Util 'apply_all_roles', 'ensure_all_roles';
  30         82  
  30         265  
15 30     30   10246 use Moose::Util::TypeConstraints;
  30         62  
  30         245  
16 30     30   82168 use String::Util qw(trim ltrim rtrim eqq);
  30         93116  
  30         2362  
17 30     30   12608 use Text::Parser::Errors;
  30         124  
  30         4200  
18 30     30   15739 use Text::Parser::Rule;
  30         142  
  30         2785  
19              
20             enum 'Text::Parser::Types::MultilineType' => [qw(join_next join_last)];
21             enum 'Text::Parser::Types::TrimType' => [qw(l r b n)];
22              
23 30     30   297 no Moose::Util::TypeConstraints;
  30         65  
  30         318  
24 30     30   26575 use FileHandle;
  30         259625  
  30         177  
25 30     30   10182 use Try::Tiny;
  30         73  
  30         4906  
26              
27              
28             sub BUILD {
29 47     47 0 151813     my $self = shift;
30 47 100       273     ensure_all_roles $self, 'Text::Parser::AutoSplit' if $self->auto_split;
31 47 100       106927     return if not defined $self->multiline_type;
32 11         71     ensure_all_roles $self, 'Text::Parser::Multiline';
33             }
34              
35              
36             has auto_chomp => (
37                 is => 'rw',
38                 isa => 'Bool',
39                 lazy => 1,
40                 default => 0,
41             );
42              
43              
44             has auto_split => (
45                 is => 'rw',
46                 isa => 'Bool',
47                 lazy => 1,
48                 default => 0,
49             );
50              
51             around auto_split => sub {
52 995     995   12768     my ( $orig, $self ) = ( shift, shift );
53 995         2473     __newval_auto_split( $orig, $self, @_ );
54 995         156642     return $orig->($self);
55             };
56              
57             sub __newval_auto_split {
58 995     995   1589     my ( $orig, $self, $newval ) = ( shift, shift, shift );
59 995 100       2171     return if not defined $newval;
60 68 100 100     694     $self->_clear_all_fields if not $newval and $orig->($self);
61 68         1548     $orig->( $self, $newval );
62 68 100       248     ensure_all_roles $self, 'Text::Parser::AutoSplit' if $newval;
63             }
64              
65              
66             has auto_trim => (
67                 is => 'rw',
68                 isa => 'Text::Parser::Types::TrimType',
69                 lazy => 1,
70                 default => 'n',
71             );
72              
73              
74             has FS => (
75                 is => 'rw',
76                 isa => 'RegexpRef',
77                 lazy => 1,
78                 default => sub {qr/\s+/},
79             );
80              
81              
82             has multiline_type => (
83                 is => 'rw',
84                 isa => 'Text::Parser::Types::MultilineType|Undef',
85                 lazy => 1,
86                 default => undef,
87             );
88              
89             around multiline_type => sub {
90 1209     1209   61784     my ( $orig, $self ) = ( shift, shift );
91 1209         27348     my $oldval = $orig->($self);
92 1209 100 100     6129     return $oldval if not @_ or eqq( $_[0], $oldval );
93 8         102     return __newval_multi_line( $orig, $self, @_ );
94             };
95              
96             sub __newval_multi_line {
97 8     8   27     my ( $orig, $self, $newval ) = ( shift, shift, shift );
98 8 100       52     ensure_all_roles( $self, 'Text::Parser::Multiline' )
99                     if defined $newval;
100 8         29361     return $orig->( $self, $newval );
101             }
102              
103              
104             has _obj_rules => (
105                 is => 'rw',
106                 isa => 'ArrayRef[Text::Parser::Rule]',
107                 lazy => 1,
108                 default => sub { [] },
109                 traits => ['Array'],
110                 handles => {
111                     _push_rule => 'push',
112                     _has_no_rules => 'is_empty',
113                     _get_rules => 'elements',
114                 },
115             );
116              
117             sub add_rule {
118 18     18 1 120     my $self = shift;
119 18 100       65     $self->auto_split(1) if not $self->auto_split;
120 18         518     my $rule = Text::Parser::Rule->new(@_);
121 18         574     $self->_push_rule($rule);
122             }
123              
124              
125             sub clear_rules {
126 2     2 1 6     my $self = shift;
127 2         69     $self->_obj_rules( [] );
128 2         65     $self->_clear_begin_rule;
129 2         63     $self->_clear_end_rule;
130             }
131              
132              
133             has _begin_rule => (
134                 is => 'rw',
135                 isa => 'Text::Parser::Rule',
136                 predicate => '_has_begin_rule',
137                 clearer => '_clear_begin_rule',
138             );
139              
140             sub BEGIN_rule {
141 4     4 1 24     my $self = shift;
142 4 100       11     $self->auto_split(1) if not $self->auto_split;
143 4         14     my (%opt) = _defaults_for_begin_end(@_);
144 4         17     $self->_modify_rule( '_begin_rule', %opt );
145             }
146              
147             sub _defaults_for_begin_end {
148 7     7   28     my (%opt) = @_;
149 7 100       25     $opt{dont_record} = 1 if not exists $opt{dont_record};
150 7 100       18     delete $opt{if} if exists $opt{if};
151 7 100       18     delete $opt{continue_to_next} if exists $opt{continue_to_next};
152 7         32     return (%opt);
153             }
154              
155             sub _modify_rule {
156 7     7   23     my ( $self, $func, %opt ) = @_;
157 7         25     my $pred = '_has' . $func;
158 7 100       219     $self->_append_rule_lines( $func, \%opt ) if $self->$pred();
159 7         188     my $rule = Text::Parser::Rule->new(%opt);
160 7         170     $self->$func($rule);
161             }
162              
163             sub _append_rule_lines {
164 3     3   7     my ( $self, $func, $opt ) = ( shift, shift, shift );
165 3         73     my $old = $self->$func();
166 3         61     $opt->{do} = $old->action . $opt->{do};
167             }
168              
169              
170             has _end_rule => (
171                 is => 'rw',
172                 isa => 'Text::Parser::Rule',
173                 predicate => '_has_end_rule',
174                 clearer => '_clear_end_rule',
175             );
176              
177             sub END_rule {
178 3     3 1 23     my $self = shift;
179 3 100       21     $self->auto_split(1) if not $self->auto_split;
180 3         12     my (%opt) = _defaults_for_begin_end(@_);
181 3         15     $self->_modify_rule( '_end_rule', %opt );
182             }
183              
184              
185             sub read {
186 74     74 1 68700     my $self = shift;
187 74 100       317     return if not defined $self->_handle_read_inp(@_);
188 68         365     $self->_run_begin_end_block('_begin_rule');
189 68         410     $self->__read_and_close_filehandle;
190 59         291     $self->_run_begin_end_block('_end_rule');
191 59         1459     $self->_ExAWK_symbol_table( {} );
192             }
193              
194             sub _handle_read_inp {
195 74     74   149     my $self = shift;
196 74 100       242     return $self->filehandle if not @_;
197 72 100 100     440     return if not ref( $_[0] ) and not $_[0];
198 71 100       2117     return $self->filename(@_) if not ref( $_[0] );
199 6         20     return $self->filehandle(@_);
200             }
201              
202             has _ExAWK_symbol_table => (
203                 is => 'rw',
204                 isa => 'HashRef[Any]',
205                 default => sub { {} },
206                 lazy => 1,
207             );
208              
209             sub _run_begin_end_block {
210 127     127   311     my ( $self, $func ) = ( shift, shift );
211 127         310     my $pred = '_has' . $func;
212 127 100       3975     return if not $self->$pred();
213 4         92     my $rule = $self->$func();
214 4         20     $rule->run( $self, 0 );
215             }
216              
217             sub __read_and_close_filehandle {
218 68     68   139     my $self = shift;
219 68         375     $self->_prep_to_read_file;
220 68         431     $self->__read_file_handle;
221 59 100       1952     $self->_close_filehandles if $self->_has_filename;
222 59         972     $self->_clear_this_line;
223             }
224              
225             sub _prep_to_read_file {
226 68     68   143     my $self = shift;
227 68         2735     $self->_reset_line_count;
228 68         2324     $self->_empty_records;
229 68         2180     $self->_clear_abort;
230             }
231              
232             sub __read_file_handle {
233 68     68   747     my $self = shift;
234 68         488     my $fh = $self->filehandle();
235 68         1364     while (<$fh>) {
236 517 100       1303         last if not $self->__parse_line($_);
237                 }
238             }
239              
240             sub __parse_line {
241 517     517   1033     my ( $self, $line ) = ( shift, shift );
242 517         15896     $self->_next_line_parsed();
243 517         1127     $line = $self->_def_line_manip($line);
244 517         2584     $self->__try_to_parse($line);
245 511         19267     return not $self->has_aborted;
246             }
247              
248             sub _def_line_manip {
249 517     517   899     my ( $self, $line ) = ( shift, shift );
250 517 100       9815     chomp $line if $self->auto_chomp;
251 517         1233     return $self->_trim_line($line);
252             }
253              
254             sub _trim_line {
255 517     517   960     my ( $self, $line ) = ( shift, shift );
256 517 100       9695     return $line if $self->auto_trim eq 'n';
257 69 100       493     return trim($line) if $self->auto_trim eq 'b';
258 8 100       147     return ltrim($line) if $self->auto_trim eq 'l';
259 4         11     return rtrim($line);
260             }
261              
262             sub __try_to_parse {
263 517     517   874     my ( $self, $line ) = @_;
264 517         5398     $self->_set_this_line($line);
265 517     517   33972     try { $self->save_record($line); }
266 517     6   3522     catch { die $_; };
  6         4720  
267             }
268              
269              
270             has filename => (
271                 is => 'rw',
272                 isa => 'Str|Undef',
273                 lazy => 1,
274                 init_arg => undef,
275                 default => undef,
276                 predicate => '_has_filename',
277                 clearer => '_clear_filename',
278                 trigger => \&_set_filehandle,
279             );
280              
281             sub _set_filehandle {
282 71     71   13187     my $self = shift;
283 71 100       1855     return $self->_clear_filename if not defined $self->filename;
284 70         347     $self->_save_filehandle( $self->__get_valid_fh );
285             }
286              
287             sub __get_valid_fh {
288 70     70   176     my $self = shift;
289 70         273     my $fname = $self->_get_valid_text_filename;
290 70 100       797     return FileHandle->new( $fname, 'r' ) if defined $fname;
291 4         98     $fname = $self->filename;
292 4         140     $self->_clear_filename;
293 4         15     $self->_throw_invalid_file_exception($fname);
294             }
295              
296             # Don't touch: Override this in Text::Parser::AutoUncompress
297             sub _get_valid_text_filename {
298 70     70   224     my $self = shift;
299 70         1554     my $fname = $self->filename;
300 70 100 66     6731     return $fname if -f $fname and -r $fname and -T $fname;
      100        
301 4         19     return;
302             }
303              
304             # Don't touch: Override this is Text::Parser::AutoUncompress
305             sub _throw_invalid_file_exception {
306 4     4   14     my ( $self, $fname ) = ( shift, shift );
307 4 100       52     die invalid_filename( name => $fname ) if not -f $fname;
308 1 50       15     die file_not_readable( name => $fname ) if not -r $fname;
309 1         9     die file_not_plain_text( name => $fname );
310             }
311              
312              
313             has filehandle => (
314                 is => 'rw',
315                 isa => 'FileHandle|Undef',
316                 lazy => 1,
317                 init_arg => undef,
318                 default => undef,
319                 predicate => '_has_filehandle',
320                 writer => '_save_filehandle',
321                 reader => '_get_filehandle',
322                 clearer => '_close_filehandles',
323             );
324              
325             sub filehandle {
326 84     84 1 1090     my $self = shift;
327 84 100 100     2563     return if not @_ and not $self->_has_filehandle;
328 81 100       563     $self->_save_filehandle(@_) if @_;
329 77 100       382     $self->_clear_filename if @_;
330 77         2719     return $self->_get_filehandle;
331             }
332              
333              
334             has lines_parsed => (
335                 is => 'ro',
336                 isa => 'Int',
337                 lazy => 1,
338                 init_arg => undef,
339                 default => 0,
340                 traits => ['Counter'],
341                 handles => {
342                     _next_line_parsed => 'inc',
343                     _reset_line_count => 'reset',
344                 }
345             );
346              
347              
348             sub save_record {
349 313     313 1 6294     my ( $self, $record ) = ( shift, shift );
350 313 100       10360     $self->_has_no_rules
351                     ? $self->push_records($record)
352                     : $self->_run_through_rules;
353             }
354              
355             sub _run_through_rules {
356 144     144   225     my $self = shift;
357 144         4409     foreach my $rule ( $self->_get_rules ) {
358 277 100       791         next if not $rule->test($self);
359 97         330         $rule->run($self);
360 97 100       2594         last if not $rule->continue_to_next;
361                 }
362             }
363              
364              
365             has _current_line => (
366                 is => 'ro',
367                 isa => 'Str|Undef',
368                 init_arg => undef,
369                 writer => '_set_this_line',
370                 reader => 'this_line',
371                 clearer => '_clear_this_line',
372                 default => undef,
373             );
374              
375              
376              
377             has abort => (
378                 is => 'rw',
379                 isa => 'Bool',
380                 lazy => 1,
381                 default => 0,
382                 traits => ['Bool'],
383                 reader => 'has_aborted',
384                 handles => {
385                     abort_reading => 'set',
386                     _clear_abort => 'unset'
387                 },
388             );
389              
390              
391             has records => (
392                 isa => 'ArrayRef[Any]',
393                 is => 'ro',
394                 lazy => 1,
395                 default => sub { return []; },
396                 auto_deref => 1,
397                 init_arg => undef,
398                 traits => ['Array'],
399                 handles => {
400                     get_records => 'elements',
401                     push_records => 'push',
402                     pop_record => 'pop',
403                     _empty_records => 'clear',
404                     _num_records => 'count',
405                     _access_record => 'accessor',
406                 },
407             );
408              
409              
410             sub last_record {
411 16     16 1 616     my $self = shift;
412 16         591     my $count = $self->_num_records();
413 16 100       48     return if not $count;
414 15         469     return $self->_access_record( $count - 1 );
415             }
416              
417              
418             sub is_line_continued {
419 71     71 1 127     my $self = shift;
420 71 100       141     return 0 if not defined $self->multiline_type;
421 65 100 100     128     return 0
422                     if $self->multiline_type eq 'join_last'
423                     and $self->lines_parsed() == 1;
424 60         181     return 1;
425             }
426              
427              
428             sub join_last_line {
429 57     57 1 95     my $self = shift;
430 57         88     my ( $last, $line ) = ( shift, shift );
431 57         188     return $last . $line;
432             }
433              
434              
435             __PACKAGE__->meta->make_immutable;
436              
437 30     30   79444 no Moose;
  30         105  
  30         277  
438              
439             1;
440              
441             __END__
442            
443             =pod
444            
445             =encoding UTF-8
446            
447             =head1 NAME
448            
449             Text::Parser - Simplifies text parsing. Easily extensible to parse any text format.
450            
451             =head1 VERSION
452            
453             version 0.926
454            
455             =head1 SYNOPSIS
456            
457             use Text::Parser;
458            
459             my $parser = Text::Parser->new();
460             $parser->read(shift);
461             print $parser->get_records, "\n";
462            
463             The above code prints the content of the file (named in the first argument) to C<STDOUT>.
464            
465             my $parser = Text::Parser->new();
466             $parser->add_rule(do => 'print');
467             $parser->read(shift);
468            
469             This example also dones the same as the earlier one. For more complex examples see the L<manual|Text::Parser::Manual>.
470            
471             =head1 OVERVIEW
472            
473             The L<need|Text::Parser::Manual/MOTIVATION> for this class stems from the fact that text parsing is the most common thing that programmers do, and yet there is no lean, simple way to do it efficiently. Most programmers still write boilerplate code with a C<while> loop.
474            
475             Instead C<Text::Parser> allows programmers to parse text with terse, self-explanatory L<rules|Text::Parser::Manual::ExtendedAWKSyntax>, whose structure is very similar to L<AWK|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ>, but extends beyond the capability of AWK. Incidentally, AWK is L<one of the ancestors of Perl|http://history.perl.org/PerlTimeline.html>! One would have expected Perl to extend the capabilities of AWK, although that's not really the case. Command-line C<perl -lane> or even C<perl -lan script.pl> are L<very limited|Text::Parser::Manual::ComparingWithNativePerl> in what they can do. Programmers cannot use them for serious projects. And parsing text files in regular Perl involves writing the same C<while> loop again. L<This website|https://perl-begin.org/uses/text-parsing/> summarizes the options available in Perl so far.
476            
477             With C<Text::Parser>, a developer can focus on specifying a grammar and then simply C<read> the file. The C<L<read|/read>> method automatically runs each rule collecting records from the text input into an array internally. And finally C<L<get_records|/get_records>> can retrieve the records. Thus the programmer now has the power of Perl to create complex data structures, along with the elegance of AWK to parse text files. The L<manuals|Text::Parser::Manual> illustrate this with L<examples|Text::Parser::Manual::ComparingWithNativePerl>.
478            
479             =head1 CONSTRUCTOR
480            
481             =head2 new
482            
483             Takes optional attributes as in example below. See section L<ATTRIBUTES|/ATTRIBUTES> for a list of the attributes and their description.
484            
485             my $parser = Text::Parser->new(
486             auto_chomp => 0,
487             multiline_type => 'join_last',
488             auto_trim => 'b',
489             auto_split => 1,
490             FS => qr/\s+/,
491             );
492            
493             =head1 ATTRIBUTES
494            
495             The attributes below can be used as options to the C<new> constructor. Each attribute has an accessor with the same name.
496            
497             =head2 auto_chomp
498            
499             Read-write attribute. Takes a boolean value as parameter. Defaults to C<0>.
500            
501             print "Parser will chomp lines automatically\n" if $parser->auto_chomp;
502            
503             =head2 auto_split
504            
505             Read-write boolean attribute. Defaults to C<0> (false). Indicates if the parser will automatically split every line into fields.
506            
507             If it is set to a true value, each line will be split into fields, and a set of methods (a quick list L<here|/"Other methods available on auto_split">) become accessible within the C<L<save_record|/save_record>> method. These methods are documented in L<Text::Parser::AutoSplit>.
508            
509             =head2 auto_trim
510            
511             Read-write attribute. The values this can take are shown under the C<L<new|/new>> constructor also. Defaults to C<'n'> (neither side spaces will be trimmed).
512            
513             $parser->auto_trim('l'); # 'l' (left), 'r' (right), 'b' (both), 'n' (neither) (Default)
514            
515             =head2 FS
516            
517             Read-write attribute that can be used to specify the field separator to be used by the C<auto_split> feature. It must be a regular expression reference enclosed in the C<qr> function, like C<qr/\s+|[,]/> which will split across either spaces or commas. The default value for this argument is C<qr/\s+/>.
518            
519             The name for this attribute comes from the built-in C<FS> variable in the popular L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html>.
520            
521             $parser->FS( qr/\s+\(*|\s*\)/ );
522            
523             C<FS> I<can> be changed in your implementation of C<save_record>. But the changes would take effect only on the next line.
524            
525             =head2 multiline_type
526            
527             If the target text format allows line-wrapping with a continuation character, the C<multiline_type> option tells the parser to join them into a single line. When setting this attribute, one must re-define L<two more methods|/"PARSING LINE-WRAPPED FILES">.
528            
529             By default, the read-write C<multiline_type> attribute has a value of C<undef>, i.e., the target text format will not have wrapped lines. It can be set to either C<'join_next'> or C<'join_last'>.
530            
531             $parser->multiline_type(undef);
532             $parser->multiline_type('join_next');
533            
534             my $mult = $parser->multiline_type;
535             print "Parser is a multi-line parser of type: $mult" if defined $mult;
536            
537             =over 4
538            
539             =item *
540            
541             If the target format allows line-wrapping I<to the B<next>> line, set C<multiline_type> to C<join_next>.
542            
543             =item *
544            
545             If the target format allows line-wrapping I<from the B<last>> line, set C<multiline_type> to C<join_last>.
546            
547             =item *
548            
549             To "slurp" a file into a single string, set C<multiline_type> to C<join_last>. In this special case, you don't need to re-define the C<L<is_line_continued|/is_line_continued>> and C<L<join_last_line|/join_last_line>> methods.
550            
551             =back
552            
553             =head1 METHODS
554            
555             These are meant to be called from the C<::main> program or within subclasses. In general, don't override them - just use them.
556            
557             =head2 add_rule
558            
559             Takes a hash as input. The keys of this hash must be the attributes of the L<Text::Parser::Rule> class constructor and the values should also meet the requirements of that constructor.
560            
561             $parser->add_rule(do => '', dont_record => 1); # Empty rule: does nothing
562             $parser->add_rule(if => 'm/li/, do => 'print', dont_record); # Prints lines with 'li'
563             $parser->add_rule( do => 'uc($3)' ); # Saves records of upper-cased third elements
564            
565             Calling this method without any arguments will throw an exception. The method internally sets the C<auto_split> attribute.
566            
567             =head2 clear_rules
568            
569             Takes no arguments, returns nothing. Clears the rules that were added to the object.
570            
571             $parser->clear_rules;
572            
573             This is useful to be able to re-use the parser after a C<read> call, to parse another text with another set of rules. The C<clear_rules> method does clear even the rules set up by C<L<BEGIN_rule|/BEGIN_rule>> and C<L<END_rule|/END_rule>>.
574            
575             =head2 BEGIN_rule
576            
577             Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored.
578            
579             $parser->BEGIN_rule(do => '~count = 0;');
580            
581             =over 4
582            
583             =item *
584            
585             Since any C<if> key is ignored, the C<do> key is always C<eval>uated. Multiple calls to C<BEGIN_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
586            
587             =item *
588            
589             The C<BEGIN> block is mainly used to initialize some variables. So by default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved.
590            
591             =back
592            
593             =head2 END_rule
594            
595             Takes a hash input like C<add_rule>, but C<if> and C<continue_to_next> keys will be ignored. Similar to C<BEGIN_rule>, but the actions in the C<END_rule> will be executed at the end of the C<read> method.
596            
597             $parser->END_rule(do => 'print ~count, "\n";');
598            
599             =over 4
600            
601             =item *
602            
603             Since any C<if> key is ignored, the C<do> key is always C<eval>uated. Multiple calls to C<END_rule> will append to the previous calls; meaning, the actions of previous calls will be included.
604            
605             =item *
606            
607             The C<END> block is mainly used to do final processing of collected records. So by default C<dont_record> is set true. User I<can> change this and set C<dont_record> as false, thus forcing a record to be saved.
608            
609             =back
610            
611             =head2 read
612            
613             Takes a single optional argument that can be either a string containing the name of the file, or a filehandle reference (a C<GLOB>) like C<\*STDIN> or an object of the C<L<FileHandle>> class.
614            
615             $parser->read($filename); # Read the file
616             $parser->read(\*STDIN); # Read the filehandle
617            
618             The above could also be done in two steps if the developer so chooses.
619            
620             $parser->filename($filename);
621             $parser->read(); # equiv: $parser->read($filename)
622            
623             $parser->filehandle(\*STDIN);
624             $parser->read(); # equiv: $parser->read(\*STDIN)
625            
626             The method returns once all records have been read, or if an exception is thrown, or if reading has been aborted with the C<L<abort_reading|/abort_reading>> method.
627            
628             Any C<close> operation will be handled (even if any exception is thrown), as long as C<read> is called with a file name parameter - not if you call with a file handle or C<GLOB> parameter.
629            
630             $parser->read('myfile.txt'); # Will close file automatically
631            
632             open MYFH, "<myfile.txt" or die "Can't open file myfile.txt at ";
633             $parser->read(\*MYFH); # Will not close MYFH
634             close MYFH;
635            
636             B<Note:> To extend the class to other text formats, override C<L<save_record|/save_record>>.
637            
638             =head2 filename
639            
640             Takes an optional string argument containing the name of a file. Returns the name of the file that was last opened if any. Returns C<undef> if no file has been opened.
641            
642             print "Last read ", $parser->filename, "\n";
643            
644             The value stored is "persistent" - meaning that the method remembers the last file that was C<L<read|/read>>.
645            
646             $parser->read(shift @ARGV);
647             print $parser->filename(), ":\n",
648             "=" x (length($parser->filename())+1),
649             "\n",
650             $parser->get_records(),
651             "\n";
652            
653             A C<read> call with a filehandle, will clear the last file name.
654            
655             $parser->read(\*MYFH);
656             print "Last file name is lost\n" if not defined $parser->filename();
657            
658             =head2 filehandle
659            
660             Takes an optional argument, that is a filehandle C<GLOB> (such as C<\*STDIN>) or an object of the C<FileHandle> class. Returns the filehandle last saved, or C<undef> if none was saved.
661            
662             my $fh = $parser->filehandle();
663            
664             Like C<L<filename|/filename>>, C<filehandle> is also "persistent". Its old value is lost when either C<filename> is set, or C<read> is called with a filename.
665            
666             $parser->read(\*STDOUT);
667             my $lastfh = $parser->filehandle(); # Will return glob of STDOUT
668            
669             =head2 lines_parsed
670            
671             Takes no arguments. Returns the number of lines last parsed. Every call to C<read>, causes the value to be auto-reset.
672            
673             print $parser->lines_parsed, " lines were parsed\n";
674            
675             =head2 has_aborted
676            
677             Takes no arguments, returns a boolean to indicate if text reading was aborted in the middle.
678            
679             print "Aborted\n" if $parser->has_aborted();
680            
681             =head2 get_records
682            
683             Takes no arguments. Returns an array containing all the records saved by the parser.
684            
685             foreach my $record ( $parser->get_records ) {
686             $i++;
687             print "Record: $i: ", $record, "\n";
688             }
689            
690             =head2 pop_record
691            
692             Takes no arguments and pops the last saved record.
693            
694             my $last_rec = $parser->pop_record;
695             $uc_last = uc $last_rec;
696             $parser->save_record($uc_last);
697            
698             =head2 last_record
699            
700             Takes no arguments and returns the last saved record. Leaves the saved records untouched.
701            
702             my $last_rec = $parser->last_record;
703            
704             =head1 USE ONLY IN RULES AND SUBCLASS
705            
706             Do NOT override these methods. They are valid only within a subclass, inside the user-implementation of methods described under L<OVERRIDE IN SUBCLASS|/"OVERRIDE IN SUBCLASS">.
707            
708             =head2 this_line
709            
710             Takes no arguments, and returns the current line being parsed. For example:
711            
712             sub save_record {
713             # ...
714             do_something($self->this_line);
715             # ...
716             }
717            
718             =head2 abort_reading
719            
720             Takes no arguments. Returns C<1>. To be used only in the derived class to abort C<read> in the middle.
721            
722             sub save_record {
723             # ...
724             $self->abort_reading if some_condition($self->this_line);
725             # ...
726             }
727            
728             =head2 push_records
729            
730             This is useful if one needs to implement an C<include>-like command in some text format. The example below illustrates this.
731            
732             package OneParser;
733             use Moose;
734             extends 'Text::Parser';
735            
736             my save_record {
737             # ...
738             # Under some condition:
739             my $parser = AnotherParser->new();
740             $parser->read($some_file)
741             $parser->push_records($parser->get_records);
742             # ...
743             }
744            
745             =head2 Other methods available on C<auto_split>
746            
747             When the C<L<auto_split|/auto_split>> attribute is on, (or if it is turned on later), the following additional methods become available:
748            
749             =over 4
750            
751             =item *
752            
753             L<NF|Text::Parser::AutoSplit/NF>
754            
755             =item *
756            
757             L<fields|Text::Parser::AutoSplit/fields>
758            
759             =item *
760            
761             L<field|Text::Parser::AutoSplit/field>
762            
763             =item *
764            
765             L<field_range|Text::Parser::AutoSplit/field_range>
766            
767             =item *
768            
769             L<join_range|Text::Parser::AutoSplit/join_range>
770            
771             =item *
772            
773             L<find_field|Text::Parser::AutoSplit/find_field>
774            
775             =item *
776            
777             L<find_field_index|Text::Parser::AutoSplit/find_field_index>
778            
779             =item *
780            
781             L<splice_fields|Text::Parser::AutoSplit/splice_fields>
782            
783             =back
784            
785             =head1 OVERRIDE IN SUBCLASS
786            
787             The following methods should never be called in the C<::main> program. They may be overridden (or re-defined) in a subclass.
788            
789             =head2 save_record
790            
791             This method may be re-defined in a subclass to parse the target text format. The default implementation takes a single argument and stores it as a record. If no arguments are passed, C<undef> is stored as a record. Note that unlike earlier versions of C<Text::Parser> it is not required to override this method in your derived class. You can simply use the rules instead.
792            
793             For a developer re-defining C<save_record>, in addition to C<L<this_line|/"this_line">>, six additional methods become available if the C<auto_split> attribute is set. These methods are described in greater detail in L<Text::Parser::AutoSplit>, and they are accessible only within C<save_record>.
794            
795             B<Note:> Developers may store records in any form - string, array reference, hash reference, complex data structure, or an object of some class. The program that reads these records using C<L<get_records|/get_records>> has to interpret them. So developers should document the records created by their own implementation of C<save_record>.
796            
797             =head2 PARSING LINE-WRAPPED FILES
798            
799             These methods are useful when parsing line-wrapped files, i.e., if the target text format allows wrapping the content of one line into multiple lines. In such cases, you should C<extend> the C<Text::Parser> class and override the following methods.
800            
801             =head3 is_line_continued
802            
803             If the target text format supports line-wrapping, the developer must override and implement this method. Your method should take a string argument and return a boolean indicating if the line is continued or not.
804            
805             There is a default implementation shipped with this class with return values as follows:
806            
807             multiline_type | Return value
808             ------------------+---------------------------------
809             undef | 0
810             join_last | 0 for first line, 1 otherwise
811             join_next | 1
812            
813             =head3 join_last_line
814            
815             Again, the developer should implement this method. This method should take two strings, join them while removing any continuation characters, and return the result. The default implementation just concatenates two strings and returns the result without removing anything (not even C<chomp>). See L<Text::Parser::Multiline> for more on this.
816            
817             =head1 EXAMPLES
818            
819             You can find example code in L<Text::Parser::Manual::ComparingWithNativePerl>.
820            
821             =head1 THINGS TO BE DONE
822            
823             This package is still a work in progress. Future versions are expected to include features to:
824            
825             =over 4
826            
827             =item *
828            
829             read and parse from a buffer
830            
831             =item *
832            
833             automatically uncompress input
834            
835             =item *
836            
837             I<suggestions welcome ...>
838            
839             =back
840            
841             Contributions and suggestions are welcome and properly acknowledged.
842            
843             =head1 SEE ALSO
844            
845             =over 4
846            
847             =item *
848            
849             L<Text::Parser::Manual> - Read this manual
850            
851             =item *
852            
853             L<The AWK Programming Language|https://books.google.com/books/about/The_AWK_Programming_Language.html?id=53ueQgAACAAJ> - by B<A>ho, B<W>einberg, and B<K>ernighan.
854            
855             =item *
856            
857             L<Text::Parser::Errors> - documentation of the exceptions this class throws
858            
859             =item *
860            
861             L<Text::Parser::Multiline> - how to read line-wrapped text input
862            
863             =back
864            
865             =head1 BUGS
866            
867             Please report any bugs or feature requests on the bugtracker website
868             L<http://github.com/balajirama/Text-Parser/issues>
869            
870             When submitting a bug or request, please include a test-file or a
871             patch to an existing test-file that illustrates the bug or desired
872             feature.
873            
874             =head1 AUTHOR
875            
876             Balaji Ramasubramanian <balajiram@cpan.org>
877            
878             =head1 COPYRIGHT AND LICENSE
879            
880             This software is copyright (c) 2018-2019 by Balaji Ramasubramanian.
881            
882             This is free software; you can redistribute it and/or modify it under
883             the same terms as the Perl 5 programming language system itself.
884            
885             =head1 CONTRIBUTORS
886            
887             =for stopwords H.Merijn Brand - Tux Mohammad S Anwar
888            
889             =over 4
890            
891             =item *
892            
893             H.Merijn Brand - Tux <h.m.brand@xs4all.nl>
894            
895             =item *
896            
897             Mohammad S Anwar <mohammad.anwar@yahoo.com>
898            
899             =back
900            
901             =cut
902