File Coverage

lib/Text/FixedWidth/Helper.pm
Criterion Covered Total %
statement 117 117 100.0
branch 36 44 81.8
condition n/a
subroutine 11 11 100.0
pod 2 2 100.0
total 166 174 95.4


line stmt bran cond sub pod time code
1             package Text::FixedWidth::Helper;
2              
3             =head1 NAME
4              
5             Text::FixedWidth::Helper - Create or verify samples of fixed-width data
6              
7             =head1 SYNOPSIS
8              
9             use Text::FixedWidth::Helper qw( d2fw f2dw );
10              
11             $output = d2fw( $delimited_input_file, $fixed_width_output_file );
12             $output = f2dw( $fixed_width_output_file, $delimited_input_file );
13              
14             =head1 DESCRIPTION
15              
16             Preparation and verification of fixed-width data are often part of software
17             development projects. Because fixed-width data is more difficult for humans
18             to visually decode than character-delimited data, it is often difficult to
19             construct and/or verify small sample files. This library provides assistance
20             with that task.
21              
22             This library assumes that the user of a program can type a plain-text file in
23             a simple format and present it, perhaps via a GUI interface, to a Perl program
24             using the library.
25              
26             Two variations are possible: delimited-data-to-fixed-width (d2fw), and
27             fixed-width-to-delimited-data (fw2d). In each case, the plain-text file
28             consists of two parts: metadata and sample data. The metadata is a mapping of
29             data field names to the widths of the those fields in the fixed-width record.
30             In each case, the subroutine handling the case is exported by this module on
31             demand only.
32              
33             =head2 Delimited data to fixed width (I)
34              
35             In I, the sample data consists of at
36             most 3 rows of data in a pipe-delimited format. The user presents the file to
37             the program, which then generates a second file which shows how those records
38             will look in a fixed-width format. Here is an example:
39              
40             fname 15
41             mi 1
42             lname 15
43             customer_id 10
44             city 20
45             state 2
46             zip 5
47              
48             Sylvester|J|Gomez|M789294592X|Rochester|NY|14618
49             Arthur|X|Fridrikkson|M783891590X|Oakland|CA|94601
50             Kasimir|E|Kristemanaczewski|N389182992X|Buffalo|NY|14214
51              
52             The user enters one data field per line: a string holding the field's name,
53             followed by one or more whitespace characters, followed by a number which is
54             the field's width in characters.
55              
56             The user then types one blank line to separate the metadata from the sample
57             data. The user then enters the metadata, one record per line, separating the
58             fields with pipe characters.
59              
60             When this file is run through the program, a second file is generated that
61             looks like this:
62              
63             12345678901234567890123456789012345678901234567890123456789012345678
64             | || | | | |
65             Sylvester JGomez M789294592Rochester NY14618
66             Arthur XFridrikkson M783891590Oakland CA94601
67             Kasimir EKristemanaczewsN389182992Buffalo NY14214
68              
69             The index row at the top has a length equal to the sum of the sizes of the
70             fixed-width fields. The second, or I row, displays pipe characters at
71             the start of each fixed-width field. Finally, the data rows show how the data
72             will be positioned within a fixed-width record. In each field, data is
73             written flush-left and space-padded on the right. Data which exceeds the
74             allotted width for a field is truncated.
75              
76             While this format is very limited (I it does not permit numerical
77             fields to be flushed-right or zero-padded on the left, it is sufficient for
78             visualization of sample data.
79              
80             =head2 Fixed width data to delimited (I)
81              
82             In I, the metadata is entered in the same way as in I. The sample data
83             consists of at most 3 rows of fixed-width data. The user presents the file to
84             the program, which then generates a second file which shows how those records
85             will look in a pipe-delimited format. Here is an example:
86              
87             fname 15
88             mi 1
89             lname 15
90             customer_id 10
91             city 20
92             state 2
93             zip 5
94              
95             Sylvester JGomez M789294592Rochester NY14618
96             Arthur XFridrikkson M783891590Oakland CA94601
97             Kasimir EKristemanaczewsN389182992Buffalo NY14214
98              
99             Output:
100              
101             Sylvester|J|Gomez|M789294592X|Rochester|NY|14618
102             Arthur|X|Fridrikkson|M783891590X|Oakland|CA|94601
103             Kasimir|E|Kristemanaczews|N389182992X|Buffalo|NY|14214
104              
105             Note that if data internded for a fixed-width field exceeded the field's
106             allotted width, it is truncated and therefore cannot be fully restored in a
107             delimited format.
108              
109             =cut
110              
111 2     2   289627 use strict;
  2         5  
  2         104  
112             BEGIN {
113 2     2   13 use Exporter ();
  2         4  
  2         47  
114 2     2   16 use vars qw($VERSION @ISA @EXPORT_OK);
  2         11  
  2         236  
115 2     2   5 $VERSION = '0.02';
116 2         54 @ISA = qw(Exporter);
117 2         581 @EXPORT_OK = qw( d2fw fw2d );
118             }
119 2     2   2732 use Data::Dumper;$Data::Dumper::Indent=1;
  2         16110  
  2         167  
120 2     2   20 use Carp;
  2         4  
  2         305  
121 2     2   11 use Cwd;
  2         3  
  2         138  
122 2     2   2832 use IO::File;
  2         3331  
  2         473  
123 2     2   13 use Scalar::Util qw( looks_like_number );
  2         4  
  2         4230  
124              
125             =head1 SUBROUTINES
126              
127             =head2 C
128              
129             =over 4
130              
131             =item * Purpose
132              
133             Given an input file with metadata (as described above) about data fields and
134             sample data in pipe-delimited format, generate an output file which displays
135             how that data will look in fixed-width format.
136              
137             =item * Arguments
138              
139             $output = d2fw( $delimited_input_file, $fixed_width_output_file );
140              
141             List of 2 elements, of which second element is optional: Strings holding name
142             of input file with delimited records and output file with fixed-width records.
143              
144             If a value is not supplied for the second argument, the name of the output
145             file will default to that of the input file appended by C<.out>.
146              
147             =item * Return Value
148              
149             String holding path to output file.
150              
151             =back
152              
153             =cut
154              
155             sub d2fw {
156 6     6 1 21698 my ( $input, $output ) = @_;
157 6 100       1439 croak "Could not locate input file $input"
158             unless (-f $input);
159 5 100       20 unless ($output) {
160 2         8 $output = "$input.out";
161             }
162 5         11 my $metadata_seen = 0;
163 5         16 my $sample_records_seen = 0;
164 5         7 my @metadata;
165 5         13 my $templ = '';
166 5         13 my $datastr = '';
167 5         8 my $sum = 0;
168 5         50 my $DATA = IO::File->new($input, 'r');
169 5 50       819 croak unless defined $DATA;
170 5         85 while (my $l = <$DATA>) {
171 44         57 chomp $l;
172 44         160 $l =~ s/\s+$//;
173 44 100       174 if ($l =~ m/^\s*$/) {
    100          
174 4 100       228 croak "Text::FixedWidth::Helper restricts records to 1000 characters"
175             if $sum > 1000;
176 3         9 foreach my $el (@metadata) {
177 21         39 $templ .= 'A' . $el->[1];
178             }
179 3         16 $metadata_seen++;
180             }
181             elsif (! $metadata_seen) {
182 30         101 my @config = split /\s+/, $l, 2;
183 30 100       304 croak "In metadata section, value of $config[0] must be numeric"
184             unless looks_like_number $config[1];
185 29         79 push @metadata, [ $config[0] => $config[1] ];
186 29         127 $sum += $config[1];
187             }
188             else {
189 10 100       29 if ($sample_records_seen >= 3) {
190 1         215 carp "Text::FixedWidth::Helper restricts you to 3 input records";
191 1         3 last;
192             }
193 9         48 my @record = split /\|/, $l, -1;
194 9         47 my $outstr = pack($templ => @record);
195 9         20 $datastr .= "$outstr\n";
196 9         2606 $sample_records_seen++;
197             }
198             }
199 3 50       33 $DATA->close() or croak "Unable to close $input after reading";
200              
201 3         75 my $mod = $sum % 10;
202 3         9 my $dec = int($sum / 10);
203 3         23 my $OUT = IO::File->new($output, 'w');
204 3 50       560 croak "Could not open $output for writing" unless defined $OUT;
205 3         41 print $OUT "1234567890" for (1 .. $dec);
206 3         34 print $OUT $_ for (1 .. $mod);
207 3         8 print $OUT "\n";
208 3         5 my $spacer = '';
209 3         7 foreach my $el (@metadata) {
210 21         24 $spacer .= '|';
211 21         54 $spacer .= ' ' x ($el->[1] - 1);
212             }
213 3         10 print $OUT "$spacer\n";
214 3         35 print $OUT $datastr;
215 3 50       11 $OUT->close() or croak "Unable to close $output after writing";
216 3         186 return $output;
217             }
218              
219             =head2 C
220              
221             =over 4
222              
223             =item * Purpose
224              
225             Given an input file with metadata (as described above) about data fields and
226             sample data in fixed-width format, generate an output file which displays
227             how that data will look in pipe-delimited format.
228              
229             =item * Arguments
230              
231             $output = f2dw( $fixed_width_output_file, $delimited_input_file );
232              
233             =item * Return Value
234              
235             String holding path to output file.
236              
237             =back
238              
239             =cut
240              
241             sub fw2d {
242 6     6 1 39395 my ( $input, $output ) = @_;
243 6 100       1075 croak "Could not locate input file $input"
244             unless (-f $input);
245 5 100       28 unless ($output) {
246 2         9 $output = "$input.out";
247             }
248 5         14 my $metadata_seen = 0;
249 5         13 my $sample_records_seen = 0;
250 5         11 my @metadata;
251 5         12 my $templ = '';
252 5         16 my @delimited_records = ();
253 5         13 my $sum = 0;
254 5         57 my $DATA = IO::File->new($input, 'r');
255 5 50       851 croak unless defined $DATA;
256 5         384 while (my $l = <$DATA>) {
257 44         59 chomp $l;
258 44         119 $l =~ s/\s+$//;
259 44 100       172 if ($l =~ m/^\s*$/) {
    100          
260 4 100       968 croak "Text::FixedWidth::Helper restricts records to 1000 characters"
261             if $sum > 1000;
262 3         9 foreach my $el (@metadata) {
263 21         41 $templ .= 'A' . $el->[1];
264             }
265 3         12 $metadata_seen++;
266             }
267             elsif (! $metadata_seen) {
268 30         279 my @config = split /\s+/, $l, 2;
269 30 100       305 croak "In metadata section, value of $config[0] must be numeric"
270             unless looks_like_number $config[1];
271 29         73 push @metadata, [ $config[0] => $config[1] ];
272 29         112 $sum += $config[1];
273             }
274             else {
275 10 100       21 if ($sample_records_seen >= 3) {
276 1         247 carp "Text::FixedWidth::Helper restricts you to 3 input records";
277 1         3 last;
278             }
279 9         76 my @record = unpack($templ => $l);
280 9         17 my @parsed_record = ();
281 9         23 for (my $f = 0; $f <= $#record; $f++) {
282 63         402 push @parsed_record, [ $metadata[$f]->[0], $record[$f] ];
283             }
284 9         14 push @delimited_records, \@parsed_record;
285 9         83 $sample_records_seen++;
286             }
287             }
288 3 50       32 $DATA->close() or croak "Unable to close $input after reading";
289              
290 3         70 my $OUT = IO::File->new($output, 'w');
291 3 50       610 croak "Could not open $output for writing"
292             unless defined $OUT;
293 3         12 foreach my $record (@delimited_records) {
294 9         13 foreach my $field (@{$record}) {
  9         15  
295 63         263 print $OUT "$field->[0]|$field->[1]\n";
296             }
297 9         23 print $OUT "\n";
298             }
299              
300 3 50       35 $OUT->close() or croak "Unable to close $output after writing";
301 3         212 return $output;
302             }
303              
304             1;
305              
306             #################### DOCUMENTATION ###################
307              
308             =head1 AUTHOR
309              
310             James E Keenan
311             CPAN ID: jkeenan
312             jkeenan@cpan.org
313             http://thenceforward.net/perl/modules/Text-FixedWidth-Helper
314              
315             Thanks to Natasha Salam for describing the need for this functionality.
316              
317             =head1 COPYRIGHT
318              
319             This program is free software; you can redistribute
320             it and/or modify it under the same terms as Perl itself.
321              
322             The full text of the license can be found in the
323             LICENSE file included with this module.
324              
325             =head1 SEE ALSO
326              
327             perl(1).
328              
329             =cut