File Coverage

blib/lib/Text/Parser/AutoSplit.pm
Criterion Covered Total %
statement 49 49 100.0
branch 12 12 100.0
condition n/a
subroutine 16 16 100.0
pod 2 2 100.0
total 79 79 100.0


line stmt bran cond sub pod time code
1 22     22   16104 use strict;
  22         54  
  22         779  
2 22     22   134 use warnings;
  22         51  
  22         1057  
3              
4             package Text::Parser::AutoSplit 1.000;
5              
6             # ABSTRACT: A role that adds the ability to auto-split a line into fields
7              
8 22     22   147 use Moose::Role;
  22         51  
  22         397  
9 22     22   138798 use MooseX::CoverableModifiers;
  22         368  
  22         234  
10 22     22   3644 use String::Util qw(trim);
  22         156  
  22         1532  
11 22     22   359 use Text::Parser::Error;
  22         58  
  22         231  
12 22     22   26327 use English;
  22         24543  
  22         184  
13              
14              
15             has _fields => (
16                 is => 'ro',
17                 isa => 'ArrayRef[Str]',
18                 lazy => 1,
19                 init_arg => undef,
20                 default => sub { [] },
21                 traits => ['Array'],
22                 writer => '_set_fields',
23                 clearer => '_clear_all_fields',
24                 handles => {
25                     'NF' => 'count',
26                     'fields' => 'elements',
27                     'field' => 'get',
28                     'find_field' => 'first',
29                     'find_field_index' => 'first_index',
30                     'splice_fields' => 'splice',
31                 },
32             );
33              
34             requires '_set_this_line', 'FS', '_clear_this_line', 'this_line',
35                 'auto_split';
36              
37             after _set_this_line => sub {
38 528     528   3429     my $self = shift;
39 528 100       12776     return if not $self->auto_split;
40 482         11314     $self->_set_fields( [ split $self->FS, trim( $self->this_line ) ] );
41             };
42              
43             after _clear_this_line => sub {
44 44     44   395     my $self = shift;
45 44         1641     $self->_clear_all_fields;
46             };
47              
48              
49             sub field_range {
50 70     70 1 94113     my $self = shift;
51 70         232     my (@range) = $self->__validate_index_range(@_);
52 67         202     $self->_sub_field_range(@range);
53             }
54              
55             sub __validate_index_range {
56 70     70   133     my $self = shift;
57              
58 70         2318     $self->field($_) for (@_);
59 67         249     map { _pos_index( $_, $self->NF ) } __set_defaults(@_);
  134         4894  
60             }
61              
62             sub __set_defaults {
63 67     67   203     my ( $i, $j ) = @_;
64 67 100       214     $i = 0 if not defined $i;
65 67 100       515     $j = -1 if not defined $j;
66 67         182     return ( $i, $j );
67             }
68              
69             sub _pos_index {
70 134     134   301     my ( $ind, $nf ) = ( shift, shift );
71 134 100       531     ( $ind < 0 ) ? $ind + $nf : $ind;
72             }
73              
74             sub _sub_field_range {
75 67     67   149     my ( $self, $start, $end ) = ( shift, shift, shift );
76 67 100       338     my (@range)
77                     = ( $start <= $end ) ? ( $start .. $end ) : reverse( $end .. $start );
78 134         120     map { $self->field($_) } @range;
  1824         63455  
79             }
80              
81              
82             sub join_range {
83 17     17 1 82     my $self = shift;
84 17 100       64     my $sep = ( @_ < 3 ) ? $LIST_SEPARATOR : pop;
85 17         80     join $sep, $self->field_range(@_);
86             }
87              
88              
89 22     22   29503 no Moose::Role;
  22         55  
  22         196  
90              
91             1;
92              
93             __END__
94            
95             =pod
96            
97             =encoding UTF-8
98            
99             =head1 NAME
100            
101             Text::Parser::AutoSplit - A role that adds the ability to auto-split a line into fields
102            
103             =head1 VERSION
104            
105             version 1.000
106            
107             =head1 SYNOPSIS
108            
109             use Text::Parser;
110            
111             my $p1 = Text::Parser->new();
112             $p1->read('/path/to/file');
113             my $p2 = Text::Parser->new();
114             $p2->add_rule( do => '$this->field(0);' );
115             ## add_rule method automatically sets up auto_split
116             $p2->read('/another/file');
117            
118             =head1 DESCRIPTION
119            
120             C<Text::Parser::AutoSplit> is a role that is automatically composed into an object of L<Text::Parser> if the C<auto_split> attribute is set during object construction, or when C<L<add_rule|Text::Parser/"add_rule">> method is called. The field separator is controlled by another C<Text::Parser> attribute C<L<FS|Text::Parser/"FS">>.
121            
122             When the C<auto_split> attribute is set to a true value, the object of C<Text::Parser> will be able to use methods described in this role.
123            
124             =head1 METHODS AVAILABLE ON AUTO-SPLIT
125            
126             These methods become available when C<auto_split> attribute is true. A runtime error will be thrown if they are called without C<auto_split> being set. They can be used inside a subclass or in the rules.
127            
128             =head2 NF
129            
130             The name of this method comes from the C<NF> variable in the popular L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html>.
131            
132             Returns the number of fields on a line. The field separator is specified with C<FS> attribute.
133            
134             $parser->applies_rule(
135             if => '$this->NF >= 2'
136             do => '$this->collect_info($2);',
137             dont_record => 1,
138             );
139            
140             If your rule contains any positional identifiers (like C<$1>, C<$2>, C<$3> etc., to identify the field) the rule automatically checks that there are at least as many fields as the largest positional identifier. So the above rule could also be written as:
141            
142             $parser->applies_rule(
143             do => '$this->collect_info($2);',
144             dont_record => 1,
145             );
146            
147             It has the same results.
148            
149             =head2 fields
150            
151             Takes no argument and returns all the fields as an array. The C<FS> field separator controls how fields are defined. Leading and trailing spaces are trimmed.
152            
153             $parser->add_rule( do => 'return [ $this->fields ];' );
154            
155             =head2 field
156            
157             Takes an integer argument and returns the field whose index is passed as argument.
158            
159             $parser->add_rule(
160             if => '$this->field(0) eq "END"',
161             do => '$this->abort_reading;',
162             dont_record => 1,
163             );
164            
165             You can specify negative elements to start counting from the end. For example index C<-1> is the last element, C<-2> is the penultimate one, etc. Let's say the following is the text on a line in a file:
166            
167             THIS IS SOME TEXT
168             field(0) field(1) field(2) field(3)
169             field(-4) field(-3) field(-2) field(-1)
170            
171             =head2 field_range
172            
173             Takes two optional integers C<$i> and C<$j> as arguments and returns an array, where the first element is C<field($i)>, the second C<field($i+1)>, and so on, till C<field($j)>.
174            
175             $parser->add_rule(
176             if => '$1 eq "NAME:"',
177             do => 'return [ $this->field_range(1, -1) ];',
178             );
179            
180             Both C<$i> and C<$j> can be negative, as is allowed by the C<field()> method. So, for example:
181            
182             $parser->add_rule(
183             do => 'return [ $this->field_range(-2, -1) ];' # Saves the last two fields of every line
184             );
185            
186             If C<$j> argument is omitted or set to C<undef>, it will be treated as C<-1> and if C<$i> is omitted, it is treated as C<0>. For example the following may be used inside rules:
187            
188             $this->field_range(1); # Returns all elements omitting the first
189             $this->field_range(); # same as fields()
190             $this->field_range(undef, -2); # Returns all elements omitting the last
191            
192             =head2 join_range
193            
194             This method essentially joins the return value of the C<field_range> method. It takes three arguments. The last argument is the joining string, and the first two are optional integer arguments C<$i> and C<$j> just like C<field_range> method.
195            
196             $parser->add_rule(
197             do => qq(
198             $this->join_range(); # Joins all fields with $" (see perlvar)
199             $this->join_range(0, -1, '#'); # Joins with # separator
200             $this->join_range(2); # Joins all elements starting with index 2 to the end
201             # with $"
202             $this->join_range(1, -2); # Joins all elements in specified range with $"
203             ));
204             ## The return value of the last statement in the 'do' block is saved as a record
205            
206             =head2 find_field
207            
208             This method finds an element matching a given criterion. The match is done by a subroutine reference passed as argument to this method. The subroutine will be called against each field on the line, until one matches or all elements have been checked. Each field will be available in the subroutine as C<$_>. Its behavior is the same as the C<first> function of L<List::Util>.
209            
210             sub save_record {
211             my $self = shift;
212             my $param = $self->find_field(
213             sub { $_ =~ /[=]/ }
214             );
215             }
216            
217             =head2 find_field_index
218            
219             This is similar to the C<L<find_field|/find_field>> method above, except that it returns the index of the element instead of the element itself.
220            
221             sub save_record {
222             my $self = shift;
223             my $idx = $self->find_field_index(
224             sub { $_ =~ /[=]/ }
225             );
226             }
227            
228             =head2 splice_fields
229            
230             Just like Perl's built-in C<splice> function.
231            
232             ## Inside your own save_record method ...
233             my (@removed1) = $self->splice_fields($offset, $length, @values);
234             my (@removed2) = $self->splice_fields($offset, $length);
235             my (@removed3) = $self->splice_fields($offset);
236            
237             The offset above is a required argument and can be negative.
238            
239             B<WARNING:> This is a destructive function. It I<will> remove elements just like Perl's built-in C<splice> does, and the removed will be returned. If you only want to get the elements in a specific range of indices, try the C<L<field_range|/field_range>> method instead.
240            
241             =head1 SEE ALSO
242            
243             =over 4
244            
245             =item *
246            
247             L<List::Util>
248            
249             =item *
250            
251             L<List::SomeUtils>
252            
253             =item *
254            
255             L<GNU Awk program|https://www.gnu.org/software/gawk/gawk.html>
256            
257             =back
258            
259             =head1 BUGS
260            
261             Please report any bugs or feature requests on the bugtracker website
262             L<http://github.com/balajirama/Text-Parser/issues>
263            
264             When submitting a bug or request, please include a test-file or a
265             patch to an existing test-file that illustrates the bug or desired
266             feature.
267            
268             =head1 AUTHOR
269            
270             Balaji Ramasubramanian <balajiram@cpan.org>
271            
272             =head1 COPYRIGHT AND LICENSE
273            
274             This software is copyright (c) 2018-2019 by Balaji Ramasubramanian.
275            
276             This is free software; you can redistribute it and/or modify it under
277             the same terms as the Perl 5 programming language system itself.
278            
279             =cut
280