File Coverage

blib/lib/List/RewriteElements.pm
Criterion Covered Total %
statement 125 125 100.0
branch 49 52 94.2
condition 32 36 88.8
subroutine 21 21 100.0
pod 10 10 100.0
total 237 244 97.1


line stmt bran cond sub pod time code
1             package List::RewriteElements;
2             #$Id: RewriteElements.pm 1123 2007-01-23 03:39:35Z jimk $
3             $VERSION = 0.09;
4 7     7   8789 use strict;
  7         17  
  7         494  
5 7     7   42 use warnings;
  7         25  
  7         243  
6 7     7   53 use Carp;
  7         13  
  7         598  
7 7     7   40 use Cwd;
  7         21  
  7         503  
8 7     7   42 use File::Basename;
  7         13  
  7         647  
9 7     7   6958 use File::Copy;
  7         19714  
  7         431  
10 7     7   43 use File::Spec;
  7         12  
  7         126  
11 7     7   9273 use Tie::File;
  7         181316  
  7         10261  
12              
13             sub new {
14 32     32 1 402030 my ($class, $argsref) = @_;
15 32 100       375 croak "Hash ref passed to constructor must contain 'body_rule' element"
16             unless defined $argsref->{body_rule};
17 31 100       292 croak "'body_rule' element value must be a code ref"
18             unless ref($argsref->{body_rule}) eq 'CODE';
19 30 100 100     394 croak "Hash ref passed to constructor must have either a 'file' element or a 'list' element"
20             unless (defined $argsref->{file} or defined $argsref->{list});
21 29 100 100     479 croak "'file' element passed to constructor not located"
22             if (defined $argsref->{file} and not -f $argsref->{file});
23 28 100 100     555 croak "'list' element passed to constructor must be array ref"
      66        
24             if ( defined $argsref->{list} and
25             (
26             (not ref($argsref->{list})) or
27             (ref($argsref->{list}) ne 'ARRAY')
28             )
29             );
30 26 100 100     359 croak "'body_suppress' element passed to constructor must be code ref"
      66        
31             if ( defined $argsref->{body_suppress} and
32             (
33             (not ref($argsref->{body_suppress})) or
34             (ref($argsref->{body_suppress}) ne 'CODE')
35             )
36             );
37 24 100 100     505 croak "'header_rule' element passed to constructor must be code ref"
      66        
38             if ( defined $argsref->{header_rule} and
39             (
40             (not ref($argsref->{header_rule})) or
41             (ref($argsref->{header_rule}) ne 'CODE')
42             )
43             );
44 22 100 100     1124 croak "If 'header_suppress' criterion is supplied, a 'header_rule' element must be supplied as well"
45             if ( defined $argsref->{header_suppress} and
46             ! defined $argsref->{header_rule}
47             );
48 21 100 100     357 croak "'header_suppress' element passed to constructor must be code ref"
      66        
49             if ( defined $argsref->{header_suppress} and
50             (
51             (not ref($argsref->{header_suppress})) or
52             (ref($argsref->{header_suppress}) ne 'CODE')
53             )
54             );
55              
56 19 100       73 if ($argsref->{file}) {
57 5         14 my @elements;
58 5 50       108 tie @elements, 'Tie::File', $argsref->{file}, recsep => $/
59             or croak "Unable to tie to $argsref->{file}";
60 5         1639 $argsref->{working} = \@elements;
61             } else {
62 14         56 $argsref->{working} = $argsref->{list};
63             }
64              
65 19         130 my $self = bless ($argsref, $class);
66              
67 19         29 $self->{rows_in} = scalar(@{$self->{working}});
  19         133  
68 19 100       1434 if (defined $self->{header_rule}) {
69 8         29 $self->{records_in} = $self->{rows_in} - 1;
70             } else {
71 11         45 $self->{records_in} = $self->{rows_in};
72             }
73             # Next attributes are initialized to empty strings because their value
74             # is not fixed until after generate_output() has been called.
75 19         70 $self->{output_path} = q{};
76 19         70 $self->{output_basename} = q{};
77             # Next attributes are initialized to zero because their value
78             # is not fixed until after generate_output() has been called.
79 19         51 $self->{rows_out} = 0;
80 19         41 $self->{records_out} = 0;
81 19         45 $self->{records_changed} = 0;
82 19         42 $self->{records_unchanged} = 0;
83 19         47 $self->{records_deleted} = 0;
84 19         78 return $self;
85             }
86              
87             sub generate_output {
88 19     19 1 17258 my $self = shift;
89 19 100 100     199 if ( ! # print to STDOUT
90             (
91             defined $self->{output_file} or
92             defined $self->{output_suffix}
93             )
94             ) {
95 11         42 $self->_handler_control();
96             } else { # print to file
97 8         44 my $outfile;
98 8 100       73 if (defined $self->{output_file}) {
99 7         35 $outfile = $self->{output_file};
100             } else {
101 1         13380 $outfile = File::Spec->catfile( ( cwd() ),
102             basename($self->{file}) . $self->{output_suffix} );
103             }
104 8 50       1443 open my $OUT, ">$outfile"
105             or croak "Unable to open $outfile for writing";
106 8         82 my $oldfh = select($OUT);
107 8         63 $self->_handler_control();
108 8 50       1483 close $OUT
109             or croak "Unable to close $outfile after writing";
110 8         42 select $oldfh;
111 8         40 $self->{output_path} = $outfile;
112 8         929 $self->{output_basename} = basename($self->{output_path});
113             }
114 19         235 $self->{records_out} = $self->{records_in} - $self->{records_deleted};
115 19         50 $self->{records_unchanged} =
116             $self->{records_out} - $self->{records_changed};
117 19 100       237 if (! defined $self->{header_rule}) {
118 11         42 $self->{rows_out} = $self->{records_out};
119             } else {
120 8 100       34 if ($self->{header_status} != -1) {
121 6         20 $self->{rows_out} = $self->{records_out} + 1;
122             } else {
123 2         7 $self->{rows_out} = $self->{records_out};
124             }
125             }
126             }
127              
128             sub _handler_control {
129 19     19   43 my $self = shift;
130 19 100       72 if (! defined $self->{header_rule}) {
131 11         94 $self->_body_rule_handler();
132             } else {
133 8         33 $self->_header_body_rule_handler();
134             }
135             }
136              
137             sub _body_rule_handler {
138 19     19   35 my $self = shift;
139 19         136 RECORD: foreach my $el (@{$self->{working}}) {
  19         114  
140 178         1214 chomp $el;
141 178 100       14926 if (defined $self->{body_suppress}) {
142 40 100       40 unless (defined (&{$self->{body_suppress}}($el))) {
  40         108  
143 6         40 $self->{records_deleted}++;
144 6         21 next RECORD;
145             }
146             }
147 172         393 my $newel = &{$self->{body_rule}}($el);
  172         561  
148 172         2458 print "$newel\n";
149 172 100       6520 $self->{records_changed}++ if $el ne $newel;
150             }
151             }
152              
153             sub _header_body_rule_handler {
154 8     8   17 my $self = shift;
155 8         69 $self->{header_status} = 0; # header present, as yet unchanged
156 8         13 my $header = shift(@{$self->{working}});
  8         28  
157 8         327 chomp $header;
158 8 100       31 if (defined $self->{header_suppress}) {
159 5 100       13 if (defined (&{$self->{header_suppress}}($header))) {
  5         20  
160 3         24 my $newheader = &{$self->{header_rule}}($header);
  3         11  
161 3         44 print "$newheader\n";
162 3 100       22 $self->{header_status} = 1 if $header ne $newheader;
163             # header changed
164             } else {
165 2         17 $self->{header_status} = -1; # header suppressed
166             }
167             } else {
168 3         4 my $newheader = &{$self->{header_rule}}($header);
  3         11  
169 3         49 print "$newheader\n";
170 3 100       22 $self->{header_status} = 1 if $header ne $newheader;
171             # header changed
172             }
173 8         33 $self->_body_rule_handler();
174             }
175              
176             sub get_output_path {
177 2     2 1 654 my $self = shift;
178 2         30 return $self->{output_path};
179             }
180              
181             sub get_output_basename {
182 3     3 1 91 my $self = shift;
183 3         105 return $self->{output_basename};
184             }
185              
186             sub get_total_rows {
187 13     13 1 5363 my $self = shift;
188 13         84 return $self->{rows_out};
189             }
190              
191             sub get_total_records {
192 13     13 1 32 my $self = shift;
193 13         74 return $self->{records_out};
194             }
195              
196             sub get_records_changed {
197 13     13 1 37 my $self = shift;
198 13         76 return $self->{records_changed};
199             }
200              
201             sub get_records_unchanged {
202 13     13 1 34 my $self = shift;
203 13         83 return $self->{records_unchanged};
204             }
205              
206             sub get_records_deleted {
207 13     13 1 943 my $self = shift;
208 13         112 return $self->{records_deleted};
209             }
210              
211             sub get_header_status {
212 10     10 1 21 my $self = shift;
213 10         76 return $self->{header_status};
214             }
215              
216             1;
217              
218              
219             #################### DOCUMENTATION ###################
220              
221             =head1 NAME
222              
223             List::RewriteElements - Create a new list by rewriting elements of a first list
224              
225             =head1 SYNOPSIS
226              
227             use List::RewriteElements;
228              
229             =head2 Constructor
230              
231             Simplest case: Input from array, output to STDOUT.
232              
233             $lre = List::RewriteElements->new( {
234             list => \@source,
235             body_rule => sub {
236             my $record = shift;
237             $record .= q{additional field};
238             },
239             } );
240              
241             Input from file, output to STDOUT:
242              
243             $lre = List::RewriteElements->new( {
244             file => "/path/to/source/file",
245             body_rule => sub {
246             my $record = shift;
247             $record .= q{,additional field};
248             },
249             } );
250              
251             Provide a different rule for the first element in the list:
252              
253             $lre = List::RewriteElements->new( {
254             file => "/path/to/source/file",
255             header_rule => sub {
256             my $record = shift;
257             $record .= q{,ADDITIONAL HEADER};
258             },
259             body_rule => sub {
260             my $record = shift;
261             $record .= q{,additional field};
262             },
263             } );
264              
265             Input from file, output to file:
266              
267             $lre = List::RewriteElements->new( {
268             file => "/path/to/source/file",
269             body_rule => sub {
270             my $record = shift;
271             $record .= q{additional field};
272             },
273             output_file => "/path/to/output/file",
274             } );
275              
276             To name output file, just provide a suffix to filename:
277              
278             $lre = List::RewriteElements->new( {
279             file => "/path/to/source/file",
280             body_rule => sub {
281             my $record = shift;
282             $record .= q{additional field};
283             },
284             output_suffix => '.out',
285             } );
286              
287             Provide criteria to suppress output of header or individual record.
288              
289             $lre = List::RewriteElements->new( {
290             file => "/path/to/source/file",
291             header_suppress => sub {
292             my $record = shift;
293             return if $record =~ /$somepattern/;
294             },
295             body_suppress => sub {
296             my $record = shift;
297             return if $record ne 'somestring';
298             },
299             body_rule => sub {
300             my $record = shift;
301             $record .= q{additional field};
302             },
303             } );
304              
305             =head2 Generate Output
306              
307             $lre->generate_output();
308              
309             =head2 Report Output Information
310              
311             $path_to_output_file = $lre->get_output_path();
312              
313             $output_file_basename = $lre->get_output_basename();
314              
315             $output_row_count = $lre->get_total_rows();
316              
317             $output_record_count = $lre->get_total_records();
318              
319             $records_changed = $lre->get_records_changed();
320              
321             $records_unchanged = $lre->get_records_unchanged();
322              
323             $records_deleted = $lre->get_records_deleted();
324              
325             $header_status = $lre->get_header_status();
326              
327             =head1 DESCRIPTION
328              
329             It is common in many situations for you to receive a flat data file from someone
330             else and have to generate a new file in which each row or record in the
331             incoming file must either (a) be transformed according to some rule before
332             being printing to the new file; or (b) if it meets certain criteria, not output to the new file at all.
333              
334             List::RewriteElements enables you to write such rules and criteria, generate
335             the file of transformed data records, and get back some basic statistics about
336             the transformation.
337              
338             List::RewriteElements is useful when the number of records in the incoming
339             file may be large and you do not want to hold the entire list in memory.
340             Similarly, the newly generated records are not held in memory but are
341             immediately Ced to STDOUT or to file.
342              
343             On the other hand, if for some reason you already have an array of records in
344             memory, you can use List::RewriteElements to apply rules and criteria to each
345             element of the array and then print the transformed records (again, without
346             holding the output in memory).
347              
348             =head1 SUBROUTINES
349              
350             =head2 C
351              
352             B List::RewriteElements constructor.
353              
354             B Reference to a hash holding the following keys:
355              
356             =over 4
357              
358             =item * C or C
359              
360             The hash must hold either a C element or a C element -- but not
361             both! The value for the C key must be an absolute path to an input
362             file. The value for C must be a reference to an array in memory.
363              
364             =item * C
365              
366             The hash must have a C element whose value is a reference to a
367             subroutine providing a formula for the transformation of an individual record
368             in the incoming file to a record in the outgoing file. The first argument
369             passed to this subroutine must be the record from the incoming file. The
370             return value from this subroutine should be a string immediately ready for
371             printing to the output file (though the string should not end in a newline, as
372             printing will be handled by C).
373              
374             =item * C
375              
376             Optionally, you may provide a C element whose value is a
377             reference to a subroutine providing a criterion according to which an
378             individual record in the incoming file should be output to the outgoing file
379             or not output, I, omitted from the output entirely. The first argument
380             to this subroutine should be the record from the incoming file. The
381             subroutine should, at least implicitly, return a true value when the record
382             I be output. The subroutine should simply C, ,
383             return an implicit C, when the record should be omitted from the
384             outgoing file.
385              
386             =item * C
387              
388             Frequently the first row in a flat data file is a header row containing, say,
389             the names of the columns in a data table, joined by a delimiter. Because the
390             header row is different from all subsequent rows, you may optionally provide a
391             C element whose value is a reference to a
392             subroutine providing a formula for the transformation of the header row
393             in the incoming file to the header in the outgoing file. The first argument
394             passed to this subroutine must be the header row from the incoming file. The
395             return value from this subroutine should be a string immediately ready for
396             printing to the output file (though the string should not end in a newline, as
397             printing will be handled by C).
398              
399             =item * C
400              
401             Optionally, if you have provided a C element, you may provide
402             a C element whose value is a
403             reference to a subroutine providing a criterion according to which an
404             the header row from the incoming file should be output to the outgoing file
405             or not output, I, omitted from the output entirely. The first argument
406             to this subroutine should be the header from the incoming file. The
407             subroutine should, at least implicitly, return a true value when the header
408             I be output. The subroutine should simply C, ,
409             return an implicit C, when the header should be omitted from the
410             outgoing file.
411              
412             =item * C or C
413              
414             It is recommended that you supply either an C or an
415             C element to the constructor; otherwise, the new list generated
416             by application of the rules and criteria will simply C to C.
417             The value of an C element should be a full path to the newly
418             created file. If you wish to create a new file name without specifying a full
419             path but simply by tacking on a suffix to the name of the incoming file,
420             provide an C element and the outgoing file will be created in
421             the directory which is the I as of the point where
422             C is called. An C element will
423             be ignored if an C element is provided.
424              
425             =item * Note 1
426              
427             If neither a C or C element is provide to the
428             constructor, List::RewriteElements will treat the first row of the incoming
429             file the same as any other row, C, it will apply the C
430             transformation formula.
431              
432             =item * Note 2
433              
434             A C or C criterion, if present, will be
435             logically applied I any C or C formula. We
436             don't apply the formula to transform a record if the record should not be
437             output at all.
438              
439             =item * Note 3
440              
441             =back
442              
443             B List::RewriteElements object.
444              
445             =head2 C
446              
447             B Generates the output specified by arguments to C,
448             I, creates an output file or Cs to C with records
449             transformed as per those arguments.
450              
451             B None.
452              
453             B Returns true value upon success. In case of failure it will
454             C with some error message.
455              
456             =head2 C
457              
458             B Get the full path to the newly created output file.
459              
460             B None.
461              
462             B String holding path to newly created output file.
463              
464             B Since use of the C attribute means that the full
465             path to the output file will not be known until C has been
466             called, C will only give a meaningful result once
467             C has been called. Otherwise, it will default to an empty
468             string.
469              
470             =head2 C
471              
472             B Get only the basename of the newly created output file.
473              
474             B None.
475              
476             B String holding basename of newly created output file.
477              
478             B Since use of the C attribute means that the full
479             path to the output file will not be known until C has been
480             called, C will only give a meaningful result once
481             C has been called. Otherwise, it will default to an empty
482             string.
483              
484             =head2 C
485              
486             B Get the total number of rows in the newly created output file.
487             This will include any header row.
488              
489             B None.
490              
491             B Nonnegative integer.
492              
493             =head2 C
494              
495             B Get the total number of data records in the newly created output
496             file. If a header row is present in that file, C will
497             return a value C<1> less than that returned by C.
498              
499             B None.
500              
501             B Nonnegative integer.
502              
503             =head2 C
504              
505             B Get the number of data records in the newly created output file
506             that are altered versions of records in the incoming file. This value does
507             not include changes in the header row.
508              
509             B None.
510              
511             B Nonnegative integer.
512              
513             =head2 C
514              
515             B Get the number of data records in the newly created output file
516             that are unaltered versions of records in the incoming file. This value does
517             not include changes in the header row.
518              
519             B None.
520              
521             B Nonnegative integer.
522              
523             =head2 C
524              
525             B Get the number of data records in the original source (file or
526             list) that were omitted from the newly created output file due to application
527             of a C criterion. This value does not include any suppression
528             of a header row following application of a C criterion.
529              
530             B None.
531              
532             B Nonnegative integer.
533              
534             =head2 C
535              
536             B Indicate whether any header row in the original source (file or
537             list)
538              
539             =over 4
540              
541             =item *
542              
543             was rewritten in the newly created output file: return value C<1>;
544              
545             =item *
546              
547             was transferred to the newly created output file without alteration: return
548             value C<0>;
549              
550             =item *
551              
552             was suppressed from appearing in the output file by application of a
553             C criterion: return value C<-1>;
554              
555             =item *
556              
557             no header row in the source: return value C.
558              
559             =back
560              
561             B None.
562              
563             B Numerical flag: C<1>, C<0>, C<-1> or C as described
564             above.
565              
566             =head1 FAQ
567              
568             =head2 Can I simultaneously rewrite records and interact with the external environment?
569              
570             Yes. If a C, C, C or C
571             either (a) needs additional information from the external environment above
572             and beyond that contained in the individual data record or (b) needs to cause
573             a change in the external environment, you can write a closure and call that
574             closure insider the rule.
575              
576             Example:
577              
578             my @greeks = qw( alpha beta gamma );
579            
580             my $get_a_greek = sub {
581             return (shift @greeks);
582             };
583              
584             my $lre = List::RewriteElements->new ( {
585             list => [ map {"$_\n"} (1..5) ],
586             body_rule => sub {
587             my $record = shift;
588             my $rv;
589             chomp $record;
590             if ($record eq '4') {
591             $rv = &{$get_a_greek};
592             } else {
593             $rv = (10 * $record);
594             }
595             return $rv;
596             },
597             body_suppress => sub {
598             my $record = shift;
599             chomp $record;
600             return if $record eq '5';
601             },
602             } );
603              
604             $lre->generate_output();
605              
606             This will produce:
607              
608             10
609             20
610             30
611             alpha
612              
613             =head2 Can I use List-Rewrite Elements with fixed-width data?
614              
615             Yes. Suppose that you have this fixed-width data (adapted from Dave Cross'
616             I):
617              
618             my @dataset = (
619             q{00374Bloggs & Co 19991105100103+00015000},
620             q{00375Smith Brothers 19991106001234-00004999},
621             q{00376Camel Inc 19991107289736+00002999},
622             q{00377Generic Code 19991108056789-00003999},
623             );
624              
625             Suppose further that you need to update certain records and that C<%revisions>
626             holds the data for updating:
627              
628             my %revisions = (
629             376 => [ 'Camel Inc', 20061107, 388293, '+', 4999 ],
630             377 => [ 'Generic Code', 20061108, 99821, '-', 6999 ],
631             );
632              
633             Write a C subroutine which uses C, C and C
634             as needed to update the records.
635              
636             my $lre = List::RewriteElements->new ( {
637             list => \@dataset,
638             body_rule => sub {
639             my $record = shift;
640             my $template = 'A5A18A8A6AA8';
641             my @rec = unpack($template, $record);
642             $rec[0] =~ s/^0+//;
643             my ($acctno, %values, $result);
644             $acctno = $rec[0];
645             $values{$acctno} = [ @rec[1..$#rec] ];
646             if ($revisions{$acctno}) {
647             $values{$acctno} = $revisions{$acctno};
648             }
649             $result = sprintf "%05d%-18s%8d%06d%1s%08d",
650             ($acctno, @{$values{$acctno}});
651             return $result;
652             },
653             } );
654              
655             =head2 How does this differ from Tie::File?
656              
657             Mark Jason Dominus' Tie::File module is one of my Fave 5 CPAN modules. It's
658             excellent for modifying a file in place. But I frequently have to leave the
659             source file unmodified and create a new file, which implies, at the very
660             least, opening, printing to, and closing filehandles in addition to using
661             Tie::File. List::RewriteElements hides all
662             that. It also provides the statistical report methods.
663              
664             =head2 Couldn't I do this with C and C?
665              
666             Quite possibly. But if your rules and criteria were complicated or long, the
667             content of the C and C C<{}> blocks would be hard to read. You
668             also wouldn't get the statistical report methods.
669              
670             =head2 How Does It Work?
671              
672             Why do you care? Why do you want to look inside the black box? If you really
673             want to know, read the source!
674              
675             =head1 PREREQUISITES
676              
677             List::RewriteElements relies only on modules distributed with the Perl core as
678             of 5.8.0. IO::Capture::Stdout is required for the test suite, but a copy is
679             included in the distribution under the F directory.
680              
681             =head1 BUGS
682              
683             None known at this time. File bug reports at L.
684              
685             =head1 HISTORY
686              
687             0.09 Mon Jan 22 22:35:56 EST 2007
688             - Update version number and release date only. Purpose: generate new
689             round of tests by cpan testers, in the hope that it eliminates a FAIL report
690             on v0.08 where failure was due solely to error on tester's box.
691              
692             0.08 Mon Jan 1 08:54:01 EST 2007
693             - xdg to the rescue! Applied and extended patches supplied by David
694             Golden for Win32. In constructor, value of C<$/> is supplied to the C
695             option.
696              
697             0.07 Sun Dec 31 11:13:04 EST 2006
698             - Switched to using File::Spec::catfile() to generate one path (rather
699             than Cwd::realpath(). This was done in an attempt to respond to corion's FAIL
700             reports (but I don't have a good Windows box, so I can't be certain of the
701             results).
702              
703             0.06 Sat Dec 16 11:31:38 EST 2006
704             - Created t/07_fixed_width.t and t/testlib/fixed.t to illustrate use of
705             List::RewriteElements with fixed-width data.
706              
707             0.05 Thu Dec 14 07:42:24 EST 2006
708             - Correction of POD formatting errors only; no change in functionality.
709             CPAN upload.
710              
711             0.04 Wed Dec 13 23:04:33 EST 2006
712             - More tests; fine-tuning of code and documentation. First CPAN upload.
713              
714             0.03 Tue Dec 12 22:13:00 EST 2006
715             - Implementation of statistical methods; more tests.
716              
717             0.02 Mon Dec 11 19:38:26 EST 2006
718             - Added tests to demonstrate use of closures to supply additional
719             information to elements such as body_rule.
720              
721             0.01 Sat Dec 9 22:29:51 2006
722             - original version; created by ExtUtils::ModuleMaker 0.47
723              
724             =head1 ACKNOWLEDGEMENTS
725              
726             Thanks to David Landgren for raising the question of use of
727             List-RewriteElements with fixed-width data.
728              
729             I then adapted an example from Dave Cross' I,
730             Chapter 7.1, "Fixed-width Data," to provide a test
731             demonstrating processing of fixed-width data.
732              
733             =head1 AUTHOR
734              
735             James E Keenan. CPAN ID: JKEENAN. jkeenan@cpan.org.
736             http://search.cpan.org/~jkeenan/ or
737             http://thenceforward.net/perl/modules/List-RewriteElements.
738              
739             =head1 COPYRIGHT
740              
741             Copyright 2006 James E Keenan (USA).
742              
743             This program is free software; you can redistribute
744             it and/or modify it under the same terms as Perl itself.
745              
746             The full text of the license can be found in the
747             LICENSE file included with this module.
748              
749             =head1 SEE ALSO
750              
751             David Cross, I (Manning, 2001).
752              
753             =cut
754              
755