File Coverage

blib/lib/Text/RecordParser.pm

Criterion	Covered	Total	%
statement	276	279	98.9
branch	168	180	93.3
condition	29	37	78.3
subroutine	31	31	100.0
pod	23	23	100.0
total	527	550	95.8

line	stmt	bran	cond	sub	pod	time	code
1							package Text::RecordParser;
2
3							=head1 NAME
4
5							Text::RecordParser - read record-oriented files
6
7							=head1 SYNOPSIS
8
9							use Text::RecordParser;
10
11							# use default record (\n) and field (,) separators
12							my $p = Text::RecordParser->new( $file );
13
14							# or be explicit
15							my $p = Text::RecordParser->new({
16							filename => $file,
17							field_separator => "\t",
18							});
19
20							$p->filename('foo.csv');
21
22							# Split records on two newlines
23							$p->record_separator("\n\n");
24
25							# Split fields on tabs
26							$p->field_separator("\t");
27
28							# Skip lines beginning with hashes
29							$p->comment( qr/^#/ );
30
31							# Trim whitespace
32							$p->trim(1);
33
34							# Use the fields in the first line as column names
35							$p->bind_header;
36
37							# Get a list of the header fields (in order)
38							my @columns = $p->field_list;
39
40							# Extract a particular field from the next row
41							my ( $name, $age ) = $p->extract( qw[name age] );
42
43							# Return all the fields from the next row
44							my @fields = $p->fetchrow_array;
45
46							# Define a field alias
47							$p->set_field_alias( name => 'handle' );
48
49							# Return all the fields from the next row as a hashref
50							my $record = $p->fetchrow_hashref;
51							print $record->{'name'};
52							# or
53							print $record->{'handle'};
54
55							# Return the record as an object with fields as accessors
56							my $object = $p->fetchrow_object;
57							print $object->name; # or $object->handle;
58
59							# Get all data as arrayref of arrayrefs
60							my $data = $p->fetchall_arrayref;
61
62							# Get all data as arrayref of hashrefs
63							my $data = $p->fetchall_arrayref( { Columns => {} } );
64
65							# Get all data as hashref of hashrefs
66							my $data = $p->fetchall_hashref('name');
67
68							=head1 DESCRIPTION
69
70							This module is for reading record-oriented data in a delimited text
71							file. The most common example have records separated by newlines and
72							fields separated by commas or tabs, but this module aims to provide a
73							consistent interface for handling sequential records in a file however
74							they may be delimited. Typically this data lists the fields in the
75							first line of the file, in which case you should call C
76							to bind the field name (or not, and it will be called implicitly). If
77							the first line contains data, you can still bind your own field names
78							via C. Either way, you can then use many methods to get
79							at the data as arrays or hashes.
80
81							=head1 METHODS
82
83							=cut
84
85	12			12		237322	use strict;
	12					18
	12					362
86	12			12		41	use warnings;
	12					15
	12					255
87	12			12		4621	use version;
	12					15701
	12					523
88	12			12		700	use Carp qw( croak );
	12					16
	12					623
89	12			12		5435	use IO::Scalar;
	12					107014
	12					467
90	12			12		5394	use List::MoreUtils qw( uniq );
	12					93139
	12					95
91	12			12		7076	use Readonly;
	12					5786
	12					663
92	12			12		5913	use Text::ParseWords qw( parse_line );
	12					10892
	12					29532
93
94							our $VERSION = version->new('1.6.5');
95
96							Readonly my $COMMA => q{,};
97							Readonly my $EMPTY_STR => q{};
98							Readonly my $NEW_LINE => qq{\n};
99							Readonly my $PIPE => q{\|};
100
101							# ----------------------------------------------------------------
102							sub new {
103
104							=pod
105
106							=head2 new
107
108							This is the object constructor. It takes a hash (or hashref) of
109							arguments. Each argument can also be set through the method of the
110							same name.
111
112							=over 4
113
114							=item * filename
115
116							The path to the file being read. If the filename is passed and the fh
117							is not, then it will open a filehandle on that file and sets C
118							accordingly.
119
120							=item * comment
121
122							A compiled regular expression identifying comment lines that should
123							be skipped.
124
125							=item * data
126
127							The data to read.
128
129							=item * fh
130
131							The filehandle of the file to read.
132
133							=item * field_separator \| fs
134
135							The field separator (default is comma).
136
137							=item * record_separator \| rs
138
139							The record separator (default is newline).
140
141							=item * field_filter
142
143							A callback applied to all the fields as they are read.
144
145							=item * header_filter
146
147							A callback applied to the column names.
148
149							=item * trim
150
151							Boolean to enable trimming of leading and trailing whitespace from fields
152							(useful if splitting on whitespace only).
153
154							=back
155
156							See methods for each argument name for more information.
157
158							Alternately, if you supply a single argument to C, it will be
159							treated as the C argument.
160
161							=cut
162
163	58			58	1	28330	my $class = shift;
164
165	58	100	100			503	my $args
		100
166							= defined $_[0] && UNIVERSAL::isa( $_[0], 'HASH' ) ? shift
167							: scalar @_ == 1 ? { filename => shift }
168							: { @_ };
169
170	58					120	my $self = bless {}, $class;
171
172	58	100				137	if ( my $fs = $args->{'fs'} ) {
173	1					4	$args->{'field_separator'} = $fs;
174	1					3	delete $args->{'fs'};
175							}
176
177	58	100				140	if ( my $rs = $args->{'rs'} ) {
178	1					3	$args->{'record_separator'} = $rs;
179	1					2	delete $args->{'rs'};
180							}
181
182	58					57	my $data_handles = 0;
183	58					118	for my $arg (
184							qw[ filename fh header_filter field_filter trim
185							field_separator record_separator data comment
186							]
187							) {
188	522	100				827	next if !defined $args->{ $arg };
189
190	56	100				244	if ( $arg =~ / \A (filename\|fh\|data) \Z /xms ) {
191	37					39	$data_handles++;
192							}
193
194	56					165	$self->$arg( $args->{ $arg } );
195							}
196
197	58	100				132	if ( $data_handles > 1 ) {
198	1					31	croak
199							'Passed too many arguments to read the data. '.
200							'Please choose only one of "filename," "fh," or "data."'
201							;
202							}
203
204	57					166	return $self;
205							}
206
207							# ----------------------------------------------------------------
208							sub bind_fields {
209
210							=pod
211
212							=head2 bind_fields
213
214							$p->bind_fields( qw[ name rank serial_number ] );
215
216							Takes an array of field names and memorizes the field positions for
217							later use. If the input file has no header line but you still wish to
218							retrieve the fields by name (or even if you want to call
219							C and then give your own field names), simply pass in the
220							an array of field names you wish to use.
221
222							Pass in an empty array reference to unset:
223
224							$p->bind_field( [] ); # unsets fields
225
226							=cut
227
228	42			42	1	2888	my $self = shift;
229
230							# using an empty arrayref to unset
231	42	100	66			174	if ( ref $_[0] eq 'ARRAY' && !@{ $_[0] } ) {
	1	100				4
232	1					3	$self->{'field_pos_ordered'} = [];
233	1					4	$self->{'field_pos'} = {};
234	1					3	$self->{'fields_bound'} = 0;
235							}
236							elsif ( @_ ) {
237	40					77	my @fields = @_;
238	40					115	$self->{'field_pos_ordered'} = [ @fields ];
239
240	40					42	my %field_pos;
241	40					84	for my $i ( 0 .. $#fields ) {
242	183	50				254	next unless $fields[ $i ];
243	183					292	$field_pos{ $fields[ $i ] } = $i;
244							}
245
246	40					82	$self->{'field_pos'} = \%field_pos;
247	40					85	$self->{'fields_bound'} = 1;
248							}
249							else {
250	1					8	croak 'Bind fields called without field list';
251							}
252
253	41					54	return 1;
254							}
255
256							# ----------------------------------------------------------------
257							sub bind_header {
258
259							=pod
260
261							=head2 bind_header
262
263							$p->bind_header;
264							my $name = $p->extract('name');
265
266							Takes the fields from the next row under the cursor and assigns the field
267							names to the values. Usually you would call this immediately after
268							opening the file in order to bind the field names in the first row.
269
270							=cut
271
272	37			37	1	174	my $self = shift;
273
274	37	100				80	if ( my @columns = $self->fetchrow_array ) {
275	34	100				67	if ( my $filter = $self->header_filter ) {
276	2					6	for my $i ( 0 .. $#columns ) {
277	12					33	$columns[ $i ] = $filter->( $columns[ $i ] );
278							}
279							}
280
281	34					249	$self->bind_fields( @columns );
282							}
283							else {
284	2					7	croak q[Can't find columns in file '], $self->filename, q['];
285							}
286
287	34					62	return 1;
288							}
289
290							# ----------------------------------------------------------------
291							sub comment {
292
293							=pod
294
295							=head2 comment
296
297							$p->comment( qr/^#/ ); # Perl-style comments
298							$p->comment( qr/^--/ ); # SQL-style comments
299
300							Takes a regex to apply to a record to see if it looks like a comment
301							to skip.
302
303							=cut
304
305	93			93	1	119	my $self = shift;
306
307	93	100				138	if ( my $arg = shift ) {
308	3	100				7	if ( ref $arg ne 'Regexp' ) {
309	1					20	croak q[Argument to comment doesn't look like a regex];
310							}
311
312	2					19	$self->{'comment'} = $arg;
313							}
314
315	92	100				301	return defined $self->{'comment'} ? $self->{'comment'} : $EMPTY_STR;
316							}
317
318							# ----------------------------------------------------------------
319							sub data {
320
321							=pod
322
323							=head2 data
324
325							$p->data( $string );
326							$p->data( \$string );
327							$p->data( @lines );
328							$p->data( [$line1, $line2, $line3] );
329							$p->data( IO::File->new('
330
331							Allows a scalar, scalar reference, glob, array, or array reference as
332							the thing to read instead of a file handle.
333
334							It's not advised to pass a filehandle to C as it will read the
335							entire contents of the file rather than one line at a time if you set
336							it via C.
337
338							=cut
339
340	10			10	1	1430	my $self = shift;
341	10					10	my $data;
342
343	10	100				29	if (@_) {
344	9					11	my $arg = shift;
345
346	9	100	66			88	if ( UNIVERSAL::isa( $arg, 'SCALAR' ) ) {
		100
		100
		100
347	1					2	$data = $$arg;
348							}
349							elsif ( UNIVERSAL::isa( $arg, 'ARRAY' ) ) {
350	1					3	$data = join $EMPTY_STR, @$arg;
351							}
352							elsif ( UNIVERSAL::isa( $arg, 'GLOB' ) ) {
353	1					4	local $/;
354	1					25	$data = <$arg>;
355							}
356							elsif ( !ref($arg) && @_ ) {
357	2					6	$data = join $EMPTY_STR, $arg, @_;
358							}
359							else {
360	4					7	$data = $arg;
361							}
362							}
363							else {
364	1					9	croak 'Data called without any arguments';
365							}
366
367	9	100				23	if ( $data ) {
368	8					45	my $fh = IO::Scalar->new( \$data );
369	8					424	$self->fh( $fh );
370							}
371							else {
372	1					10	croak 'No usable data';
373							}
374
375	8					23	return 1;
376							}
377
378							# ----------------------------------------------------------------
379							sub extract {
380
381							=pod
382
383							=head2 extract
384
385							my ( $foo, $bar, $baz ) = $p->extract( qw[ foo bar baz ] );
386
387							Extracts a list of fields out of the last row read. The field names
388							must correspond to the field names bound either via C or
389							C.
390
391							=cut
392
393	6			6	1	593	my $self = shift;
394	6	100				14	my @fields = @_ or return;
395	5					9	my %allowed = map { $_, 1 } $self->field_list;
	21					29
396	4	100				10	my $record = $self->fetchrow_hashref or return;
397
398	3					3	my @data;
399	3					4	foreach my $field ( @fields ) {
400	4	100				7	if ( $allowed{ $field } ) {
401	3					4	push @data, $record->{ $field };
402							}
403							else {
404	1					5	croak "Invalid field $field for file "
405							. $self->filename
406							. $NEW_LINE
407							. 'Valid fields are: '
408							. join(', ', $self->field_list)
409							. $NEW_LINE
410							;
411							}
412							}
413
414	2	100				8	return scalar @data == 1 ? $data[0] : @data;
415							}
416
417							# ----------------------------------------------------------------
418							sub fetchrow_array {
419
420							=pod
421
422							=head2 fetchrow_array
423
424							my @values = $p->fetchrow_array;
425
426							Reads a row from the file and returns an array or array reference
427							of the fields.
428
429							=cut
430
431	91			91	1	2698	my $self = shift;
432	91	100				119	my $fh = $self->fh or croak 'No filename or file handle';
433	90					187	my $comment = $self->comment;
434	90					338	local $/ = $self->record_separator;
435
436	90					602	my $line;
437	90					81	my $line_no = 0;
438	90					71	for ( ;; ) {
439	101					79	$line_no++;
440	101	100				575	defined( $line = <$fh> ) or return;
441	91					492	chomp $line;
442	91	100	100			217	next if $comment and $line =~ $comment;
443	87					170	$line =~ s/(?
444	87	100				144	last if $line;
445							}
446
447	80					140	my $separator = $self->field_separator;
448	80	100				221	$separator eq $PIPE and $separator = '\\|';
449	80	50				479	my @fields = map { defined $_ && $_ =~ s/\\'/'/g; $_ } (
	391	100				9043
	391					583
450							( ref $separator eq 'Regexp' )
451							? parse_line( $separator, 0, $line )
452							: parse_line( $separator, 1, $line )
453							);
454
455	80	50				164	if ( !@fields ) {
456	0					0	croak "Error reading line number $line_no:\n$line";
457							}
458
459	80	100				166	if ( my $filter = $self->field_filter ) {
460	4					5	@fields = map { $filter->( $_ ) } @fields;
	24					73
461							}
462
463	80	100				552	if ( $self->trim ) {
464	7	50				8	@fields = map { defined $_ && s/^\s+\|\s+$//g; $_ } @fields;
	29					99
	29					46
465							}
466
467	80					79	while ( my ( $position, $callback ) = each %{ $self->field_compute } ) {
	87					163
468	7	100				28	next if $position !~ m/^\d+$/;
469	3					13	$fields[ $position ] = $callback->( $fields[ $position ], \@fields );
470							}
471
472	80	100				519	return wantarray ? @fields : \@fields;
473							}
474
475							# ----------------------------------------------------------------
476							sub fetchrow_hashref {
477
478							=pod
479
480							=head2 fetchrow_hashref
481
482							my $record = $p->fetchrow_hashref;
483							print "Name = ", $record->{'name'}, "\n";
484
485							Reads a line of the file and returns it as a hash reference. The keys
486							of the hashref are the field names bound via C or
487							C. If you do not bind fields prior to calling this method,
488							the C method will be implicitly called for you.
489
490							=cut
491
492	38			38	1	4991	my $self = shift;
493	38	50				71	my @fields = $self->field_list or return;
494	38	100				67	my @row = $self->fetchrow_array or return;
495
496	33					45	my $i = 0;
497	33					29	my %return;
498	33					47	for my $field ( @fields ) {
499	156	50				222	next unless defined $row[ $i ];
500	156					219	$return{ $field } = $row[ $i++ ];
501	156	100				200	if ( my @aliases = $self->get_field_aliases( $field ) ) {
502	2					5	$return{ $_ } = $return{ $field } for @aliases;
503							}
504							}
505
506	33					41	while ( my ( $position, $callback ) = each %{ $self->field_compute } ) {
	37					98
507	4					14	$return{ $position } = $callback->( $return{ $position }, \%return );
508							}
509
510	33					154	return \%return;
511							}
512
513							# ----------------------------------------------------------------
514							sub fetchrow_object {
515
516							=pod
517
518							=head2 fetchrow_object
519
520							while ( my $object = $p->fetchrow_object ) {
521							my $id = $object->id;
522							my $name = $object->naem; # <-- this will throw a runtime error
523							}
524
525							This will return the next data record as a Text::RecordParser::Object
526							object that has read-only accessor methods of the field names and any
527							aliases. This allows you to enforce field names, further helping
528							ensure that your code is reading the input file correctly. That is,
529							if you are using the "fetchrow_hashref" method to read each line, you
530							may misspell the hash key and introduce a bug in your code. With this
531							method, Perl will throw an error if you attempt to read a field not
532							defined in the file's headers. Additionally, any defined field
533							aliases will be created as additional accessor methods.
534
535							=cut
536
537	4			4	1	999	my $self = shift;
538	4	100				9	my $row = $self->fetchrow_hashref or return;
539	3	50				7	my @fields = $self->field_list or return;
540
541	3					5	push @fields, map { $self->get_field_aliases( $_ ) } @fields;
	10					11
542
543	3					18	return Text::RecordParser::Object->new( \@fields, $row );
544							}
545
546							# ----------------------------------------------------------------
547							sub fetchall_arrayref {
548
549							=pod
550
551							=head2 fetchall_arrayref
552
553							my $records = $p->fetchall_arrayref;
554							for my $record ( @$records ) {
555							print "Name = ", $record->[0], "\n";
556							}
557
558							my $records = $p->fetchall_arrayref( { Columns => {} } );
559							for my $record ( @$records ) {
560							print "Name = ", $record->{'name'}, "\n";
561							}
562
563							Like DBI's fetchall_arrayref, returns an arrayref of arrayrefs. Also
564							accepts optional "{ Columns => {} }" argument to return an arrayref of
565							hashrefs.
566
567							=cut
568
569	4			4	1	15	my $self = shift;
570	1					2	my %args
571	4	100	100			26	= defined $_[0] && ref $_[0] eq 'HASH' ? %{ shift() }
		100
572							: @_ % 2 == 0 ? @_
573							: ();
574
575	4	100				13	my $method = ref $args{'Columns'} eq 'HASH'
576							? 'fetchrow_hashref' : 'fetchrow_array';
577
578	4					3	my @return;
579	4					10	while ( my $record = $self->$method() ) {
580	9					20	push @return, $record;
581							}
582
583	4					10	return \@return;
584							}
585
586							# ----------------------------------------------------------------
587							sub fetchall_hashref {
588
589							=pod
590
591							=head2 fetchall_hashref
592
593							my $records = $p->fetchall_hashref('id');
594							for my $id ( keys %$records ) {
595							my $record = $records->{ $id };
596							print "Name = ", $record->{'name'}, "\n";
597							}
598
599							Like DBI's fetchall_hashref, this returns a hash reference of hash
600							references. The keys of the top-level hashref are the field values
601							of the field argument you supply. The field name you supply can be
602							a field created by a C.
603
604							=cut
605
606	3			3	1	56	my $self = shift;
607	3		50			8	my $key_field = shift(@_) \|\| return croak('No key field');
608	3					7	my @fields = $self->field_list;
609
610	3					5	my ( %return, $field_ok );
611	3					7	while ( my $record = $self->fetchrow_hashref ) {
612	5	100				12	if ( !$field_ok ) {
613	3	100				8	if ( !exists $record->{ $key_field } ) {
614	1					23	croak "Invalid key field: '$key_field'";
615							}
616
617	2					4	$field_ok = 1;
618							}
619
620	4					15	$return{ $record->{ $key_field } } = $record;
621							}
622
623	2					8	return \%return;
624							}
625
626							# ----------------------------------------------------------------
627							sub fh {
628
629							=pod
630
631							=head2 fh
632
633							open my $fh, '<', $file or die $!;
634							$p->fh( $fh );
635
636							Gets or sets the filehandle of the file being read.
637
638							=cut
639
640	148			148	1	1860	my ( $self, $arg ) = @_;
641
642	148	100				280	if ( defined $arg ) {
643	13	100				38	if ( ! UNIVERSAL::isa( $arg, 'GLOB' ) ) {
644	1					10	croak q[Argument to fh doesn't look like a filehandle];
645							}
646
647	12	100				28	if ( defined $self->{'fh'} ) {
648	3	100				25	close $self->{'fh'} or croak "Can't close existing filehandle: $!";
649							}
650
651	11					29	$self->{'fh'} = $arg;
652	11					32	$self->{'filename'} = $EMPTY_STR;
653							}
654
655	146	100				411	if ( !defined $self->{'fh'} ) {
656	73	100				137	if ( my $file = $self->filename ) {
657	33	100				963	open my $fh, '<', $file or croak "Cannot read '$file': $!";
658	32					85	$self->{'fh'} = $fh;
659							}
660							}
661
662	145					653	return $self->{'fh'};
663							}
664
665							# ----------------------------------------------------------------
666							sub field_compute {
667
668							=pod
669
670							=head2 field_compute
671
672							A callback applied to the fields identified by position (or field
673							name if C or C was called).
674
675							The callback will be passed two arguments:
676
677							=over 4
678
679							=item 1
680
681							The current field
682
683							=item 2
684
685							A reference to all the other fields, either as an array or hash
686							reference, depending on the method which you called.
687
688							=back
689
690							If data looks like this:
691
692							parent children
693							Mike Greg,Peter,Bobby
694							Carol Marcia,Jane,Cindy
695
696							You could split the "children" field into an array reference with the
697							values like so:
698
699							$p->field_compute( 'children', sub { [ split /,/, shift() ] } );
700
701							The field position or name doesn't actually have to exist, which means
702							you could create new, computed fields on-the-fly. E.g., if you data
703							looks like this:
704
705							1,3,5
706							32,4,1
707							9,5,4
708
709							You could write a field_compute like this:
710
711							$p->field_compute( 3,
712							sub {
713							my ( $cur, $others ) = @_;
714							my $sum;
715							$sum += $_ for @$others;
716							return $sum;
717							}
718							);
719
720							Field "3" will be created as the sum of the other fields. This allows
721							you to further write:
722
723							my $data = $p->fetchall_arrayref;
724							for my $rec ( @$data ) {
725							print "$rec->[0] + $rec->[1] + $rec->[2] = $rec->[3]\n";
726							}
727
728							Prints:
729
730							1 + 3 + 5 = 9
731							32 + 4 + 1 = 37
732							9 + 5 + 4 = 18
733
734							=cut
735
736	130			130	1	707	my $self = shift;
737
738	130	100				202	if ( @_ ) {
739	6					8	my ( $position, $callback ) = @_;
740
741	6	100				23	if ( $position !~ /\S+/ ) {
742	1					19	croak 'No usable field name or position';
743							}
744
745	5	100				10	if ( ref $callback ne 'CODE' ) {
746	1					15	croak 'Callback not code reference';
747							}
748
749	4					11	$self->{'field_computes'}{ $position } = $callback;
750							}
751
752	128		100			611	return $self->{'field_computes'} \|\| {};
753							}
754
755							# ----------------------------------------------------------------
756							sub field_filter {
757
758							=pod
759
760							=head2 field_filter
761
762							$p->field_filter( sub { $_ = shift; uc(lc($_)) } );
763
764							A callback which is applied to each field. The callback will be
765							passed the current value of the field. Whatever is passed back will
766							become the new value of the field. The above example capitalizes
767							field values. To unset the filter, pass in the empty string.
768
769							=cut
770
771	89			89	1	1332	my ( $self, $filter ) = @_;
772
773	89	100				146	if ( defined $filter ) {
774	6	100				16	if ( $filter eq $EMPTY_STR ) {
		100
775	1					5	$self->{'field_filter'} = $EMPTY_STR; # allows nullification
776							}
777							elsif ( ref $filter eq 'CODE' ) {
778	4					27	$self->{'field_filter'} = $filter;
779							}
780							else {
781	1					12	croak q[Argument to field_filter doesn't look like code];
782							}
783							}
784
785	88		66			337	return $self->{'field_filter'} \|\| $EMPTY_STR;
786							}
787
788							# ----------------------------------------------------------------
789							sub field_list {
790
791							=pod
792
793							=head2 field_list
794
795							$p->bind_fields( qw[ foo bar baz ] );
796							my @fields = $p->field_list;
797							print join ', ', @fields; # prints "foo, bar, baz"
798
799							Returns the fields bound via C (or C).
800
801							=cut
802
803	65			65	1	851	my $self = shift;
804
805	65	100				138	if ( !$self->{'fields_bound'} ) {
806	12					30	$self->bind_header;
807							}
808
809	63	50				141	if ( ref $self->{'field_pos_ordered'} eq 'ARRAY' ) {
810	63					52	return @{ $self->{'field_pos_ordered'} };
	63					249
811							}
812							else {
813	0					0	croak 'No fields. Call "bind_fields" or "bind_header" first.';
814							}
815							}
816
817							# ----------------------------------------------------------------
818							sub field_positions {
819
820							=pod
821
822							=head2 field_positions
823
824							my %positions = $p->field_positions;
825
826							Returns a hash of the fields and their positions bound via
827							C (or C). Mostly for internal use.
828
829							=cut
830
831	6			6	1	26	my $self = shift;
832
833	6	100				19	if ( ref $self->{'field_pos'} eq 'HASH' ) {
834	3					3	return %{ $self->{'field_pos'} };
	3					15
835							}
836							else {
837	3					12	return;
838							}
839							}
840
841							# ----------------------------------------------------------------
842							sub field_separator {
843
844							=pod
845
846							=head2 field_separator
847
848							$p->field_separator("\t"); # splits fields on tabs
849							$p->field_separator('::'); # splits fields on double colons
850							$p->field_separator(qr/\s+/); # splits fields on whitespace
851							my $sep = $p->field_separator; # returns the current separator
852
853							Gets and sets the token to use as the field delimiter. Regular
854							expressions can be specified using qr//. If not specified, it will
855							take a guess based on the filename extension ("comma" for ".txt,"
856							".dat," or ".csv"; "tab" for ".tab"). The default is a comma.
857
858							=cut
859
860	104			104	1	2108	my $self = shift;
861
862	104	100				195	if ( @_ ) {
863	18					36	$self->{'field_separator'} = shift;
864							}
865
866	104	100				189	if ( !$self->{'field_separator'} ) {
867	36					36	my $guess;
868	36	100				70	if ( my $filename = $self->filename ) {
869	22	100				117	if ( $filename =~ /\.(csv\|txt\|dat)$/ ) {
		50
870	21					25	$guess = q{,};
871							}
872							elsif ( $filename =~ /\.tab$/ ) {
873	0					0	$guess = qq{\t};
874							}
875							}
876
877	36	100				155	if ( $guess ) {
878	21					40	$self->{'field_separator'} = $guess;
879							}
880							}
881
882	104		66			295	return $self->{'field_separator'} \|\| $COMMA;
883							}
884
885							# ----------------------------------------------------------------
886							sub filename {
887
888							=pod
889
890							=head2 filename
891
892							$p->filename('/path/to/file.dat');
893
894							Gets or sets the complete path to the file to be read. If a file is
895							already opened, then the handle on it will be closed and a new one
896							opened on the new file.
897
898							=cut
899
900	160			160	1	2614	my $self = shift;
901
902	160	100				259	if ( my $filename = shift ) {
903	42	100	66			1045	if ( -d $filename ) {
		100
904	1					28	croak "Cannot use directory '$filename' as input source";
905							}
906							elsif ( -f _ and -r _ ) {
907	40	100				132	if ( my $fh = $self->fh ) {
908	2	100				15	if ( !close($fh) ) {
909	1					13	croak "Can't close previously opened filehandle: $!\n";
910							}
911
912	1					3	$self->{'fh'} = undef;
913	1					4	$self->bind_fields([]);
914							}
915
916	39					104	$self->{'filename'} = $filename;
917							}
918							else {
919	1					21	croak
920							"Cannot use '$filename' as input source: ",
921							'file does not exist or is not readable.'
922							;
923							}
924							}
925
926	157		100			581	return $self->{'filename'} \|\| $EMPTY_STR;
927							}
928
929							# ----------------------------------------------------------------
930							sub get_field_aliases {
931
932							=pod
933
934							=head2 get_field_aliases
935
936							my @aliases = $p->get_field_aliases('name');
937
938							Allows you to define alternate names for fields, e.g., sometimes your
939							input file calls city "town" or "township," sometimes a file uses "Moniker"
940							instead of "name."
941
942							=cut
943
944	167			167	1	126	my $self = shift;
945	167	50				232	my $field_name = shift or return;
946
947	167	100				239	if ( !$self->{'field_alias'} ) {
948	160					343	return;
949							}
950
951	7	100				3	return @{ $self->{'field_alias'}{ $field_name } \|\| [] };
	7					42
952							}
953
954							# ----------------------------------------------------------------
955							sub header_filter {
956
957							=pod
958
959							=head2 header_filter
960
961							$p->header_filter( sub { $_ = shift; s/\s+/_/g; lc $_ } );
962
963							A callback applied to column header names. The callback will be
964							passed the current value of the header. Whatever is returned will
965							become the new value of the header. The above example collapses
966							spaces into a single underscore and lowercases the letters. To unset
967							a filter, pass in the empty string.
968
969							=cut
970
971	43			43	1	1923	my ( $self, $filter ) = @_;
972
973	43	100				90	if ( defined $filter ) {
974	6	100				18	if ( $filter eq $EMPTY_STR ) {
		100
975	1					4	$self->{'header_filter'} = $EMPTY_STR; # allows nullification
976							}
977							elsif ( ref $filter eq 'CODE' ) {
978	4					33	$self->{'header_filter'} = $filter;
979
980	4	100				13	if ( my %field_pos = $self->field_positions ) {
981	2					1	my @new_order;
982	2					6	while ( my ( $field, $order ) = each %field_pos ) {
983	6					7	my $xform = $filter->( $field );
984	6					20	$new_order[ $order ] = $xform;
985							}
986
987	2					4	$self->bind_fields( @new_order );
988							}
989							}
990							else{
991	1					30	croak q[Argument to field_filter doesn't look like code];
992							}
993							}
994
995	42		66			178	return $self->{'header_filter'} \|\| $EMPTY_STR;
996							}
997
998							# ----------------------------------------------------------------
999							sub record_separator {
1000
1001							=pod
1002
1003							=head2 record_separator
1004
1005							$p->record_separator("\n//\n");
1006							$p->field_separator("\n");
1007
1008							Gets and sets the token to use as the record separator. The default is
1009							a newline ("\n").
1010
1011							The above example would read a file that looks like this:
1012
1013							field1
1014							field2
1015							field3
1016							//
1017							data1
1018							data2
1019							data3
1020							//
1021
1022							=cut
1023
1024	101			101	1	570	my $self = shift;
1025
1026	101	100				203	if ( @_ ) {
1027	7					15	$self->{'record_separator'} = shift;
1028							}
1029
1030	101		66			345	return $self->{'record_separator'} \|\| $NEW_LINE;
1031							}
1032
1033							# ----------------------------------------------------------------
1034							sub set_field_alias {
1035
1036							=pod
1037
1038							=head2 set_field_alias
1039
1040							$p->set_field_alias({
1041							name => 'Moniker,handle', # comma-separated string
1042							city => [ qw( town township ) ], # or anonymous arrayref
1043							});
1044
1045							Allows you to define alternate names for fields, e.g., sometimes your
1046							input file calls city "town" or "township," sometimes a file uses "Moniker"
1047							instead of "name."
1048
1049							=cut
1050
1051	1			1	1	24	my $self = shift;
1052	1	50				4	my %args = ref $_[0] eq 'HASH' ? %{ $_[0] } : @_;
	1					3
1053	1					5	my %is_field = map { $_, 1 } $self->field_list;
	6					7
1054
1055							ARG:
1056	1					4	while ( my ( $field_name, $aliases ) = each %args ) {
1057	2	100				4	if ( ref $aliases ne 'ARRAY' ) {
1058	1					2	$aliases = [ split(/,/, $aliases) ];
1059							}
1060
1061	2	100				4	if ( !$is_field{ $field_name } ) {
1062	1					2	push @$aliases, $field_name;
1063	1					1	( $field_name ) = grep { $is_field{ $_ } } @$aliases;
	3					4
1064	1	50				3	next ARG unless $field_name;
1065							}
1066
1067	4					11	$self->{'field_alias'}{ $field_name } = [
1068	2					8	grep { $_ ne $field_name } uniq( @$aliases )
1069							];
1070							}
1071
1072	1					3	return 1;
1073							}
1074
1075							# ----------------------------------------------------------------
1076							sub trim {
1077
1078							=pod
1079
1080							=head2 trim
1081
1082							my $trim_value = $p->trim(1);
1083
1084							Provide "true" argument to remove leading and trailing whitespace from
1085							fields. Use a "false" argument to disable.
1086
1087							=cut
1088
1089	89			89	1	572	my ( $self, $arg ) = @_;
1090
1091	89	100				133	if ( defined $arg ) {
1092	6	100				18	$self->{'trim'} = $arg ? 1 : 0;
1093							}
1094
1095	89					181	return $self->{'trim'};
1096							}
1097
1098							1;
1099
1100							# ----------------------------------------------------------------
1101							# I must Create a System, or be enslav'd by another Man's;
1102							# I will not Reason and Compare; my business is to Create.
1103							# -- William Blake, "Jerusalem"
1104							# ----------------------------------------------------------------
1105
1106							=pod
1107
1108							=head1 AUTHOR
1109
1110							Ken Youens-Clark Ekclark@cpan.orgE
1111
1112							=head1 SOURCE
1113
1114							http://github.com/kyclark/text-recordparser
1115
1116							=head1 CREDITS
1117
1118							Thanks to the following:
1119
1120							=over 4
1121
1122							=item * Benjamin Tilly
1123
1124							For Text::xSV, the inspirado for this module
1125
1126							=item * Tim Bunce et al.
1127
1128							For DBI, from which many of the methods were shamelessly stolen
1129
1130							=item * Tom Aldcroft
1131
1132							For contributing code to make it easy to parse whitespace-delimited data
1133
1134							=item * Liya Ren
1135
1136							For catching the column-ordering error when parsing with "no-headers"
1137
1138							=item * Sharon Wei
1139
1140							For catching bug in C that sets up infinite loops
1141
1142							=item * Lars Thegler
1143
1144							For bug report on missing "script_files" arg in Build.PL
1145
1146							=back
1147
1148							=head1 BUGS
1149
1150							None known. Please use http://rt.cpan.org/ for reporting bugs.
1151
1152							=head1 LICENSE AND COPYRIGHT
1153
1154							Copyright (C) 2006-10 Ken Youens-Clark. All rights reserved.
1155
1156							This program is free software; you can redistribute it and/or modify
1157							it under the terms of the GNU General Public License as published by
1158							the Free Software Foundation; version 2.
1159
1160							This program is distributed in the hope that it will be useful, but
1161							WITHOUT ANY WARRANTY; without even the implied warranty of
1162							MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1163							General Public License for more details.
1164
1165							=cut