File Coverage

blib/lib/Text/CSV/Pivot.pm
Criterion Covered Total %
statement 96 99 96.9
branch 20 28 71.4
condition 26 29 89.6
subroutine 9 9 100.0
pod 1 2 50.0
total 152 167 91.0


line stmt bran cond sub pod time code
1             package Text::CSV::Pivot;
2              
3             $Text::CSV::Pivot::VERSION = '0.09';
4             $Text::CSV::Pivot::AUTHORITY = 'cpan:MANWAR';
5              
6             =head1 NAME
7              
8             Text::CSV::Pivot - Transform CSV file into Pivot Table format.
9              
10             =head1 VERSION
11              
12             Version 0.09
13              
14             =cut
15              
16 4     4   122225 use strict; use warnings;
  4     4   26  
  4         94  
  4         19  
  4         6  
  4         84  
17 4     4   63 use 5.008;
  4         12  
18 4     4   1740 use Text::CSV;
  4         57178  
  4         176  
19 4     4   27 use File::Basename;
  4         9  
  4         3798  
20              
21             sub new {
22 4     4 0 8371 my ($class, @args) = @_;
23              
24 4 50 33     28 die "ERROR: Parameters have to be hashref."
25             if (@args == 1 && ((ref $args[0]) ne 'HASH'));
26              
27 4         11 my @required_keys = qw(input_file col_key_idx col_name_idx col_value_idx);
28 4         6 my @optional_keys = qw(output_file col_skip_idx);
29              
30 4         7 my $self = {};
31 4         7 foreach my $key (@required_keys) {
32 16         21 my $val = $args[0]->{$key};
33 16 50       21 die "ERROR: Missing required key '$key'." unless (defined $val);
34 16         29 $self->{$key} = delete $args[0]->{$key};
35             }
36 4         8 foreach my $key (@optional_keys) {
37 8         10 my $val = $args[0]->{$key};
38 8 100       14 if (defined $val) {
39 5         10 $self->{$key} = delete $args[0]->{$key};
40             }
41             }
42              
43 4 100       12 $self->{col_skip_idx} = [] unless (exists $self->{col_skip_idx});
44 4 50       11 $args[0]->{binary} = 1 unless (exists $args[0]->{binary});
45              
46 4 50       16 $self->{'_csv_handler'} = Text::CSV->new($args[0])
47             or die "ERROR: Can't use CSV: ".Text::CSV->error_diag;
48              
49 4         370 my $input_file = $self->{'input_file'};
50 4 50   1   119 open(my $in, '<:encoding(utf8)', $input_file)
  1         8  
  1         7  
  1         9  
51             or die "ERROR: Can't open input file $input_file [$!]\n";
52 4         12761 $self->{'input_file'} = $in;
53              
54 4         9 my $output_file = $self->{'output_file'};
55 4 50       8 if (!defined $output_file) {
56 0         0 $output_file = fileparse($input_file, '\.[^\.]*');
57 0         0 $output_file = sprintf("%s.pivot.csv", $output_file);
58             }
59              
60 4 50       155 open(my $out, '>:encoding(utf8)', $output_file)
61             or die "ERROR: Can't open output file $output_file [$!]\n";
62 4         150 $self->{'output_file'} = $out;
63              
64 4         138 $self->{'_old_columns'} = $self->{'_csv_handler'}->getline($in);
65              
66 4         182 bless $self, $class;
67              
68 4         13 $self->_process_data;
69              
70 4         18 return $self;
71             }
72              
73              
74             =head1 DESCRIPTION
75              
76             Recently I was asked to prepare pivot table using csv file at work. Having done
77             that using quick and dirty perl script, I decided to clean up and make it generic
78             so that others can also benefit.
79              
80             Below is sample data, I used for prototype as source csv file.
81              
82             +----------------+-----------+--------+--------+
83             | Student | Subject | Result | Year |
84             +----------------+-----------+--------+--------+
85             | Smith, John | Music | 7.0 | Year 1 |
86             | Smith, John | Maths | 4.0 | Year 1 |
87             | Smith, John | History | 9.0 | Year 1 |
88             | Smith, John | Language | 7.0 | Year 1 |
89             | Smith, John | Geography | 9.0 | Year 1 |
90             | Gabriel, Peter | Music | 2.0 | Year 1 |
91             | Gabriel, Peter | Maths | 10.0 | Year 1 |
92             | Gabriel, Peter | History | 7.0 | Year 1 |
93             | Gabriel, Peter | Language | 4.0 | Year 1 |
94             | Gabriel, Peter | Geography | 10.0 | Year 1 |
95             +----------------+-----------+--------+--------+
96              
97             I aim to get something like this below.
98              
99             +----------------+--------+-----------+---------+----------+-------+-------+
100             | Student | Year | Geography | History | Language | Maths | Music |
101             +----------------+--------+-----------+---------+----------+-------+-------+
102             | Gabriel, Peter | Year 1 | 10.0 | 7.0 | 4.0 | 10.0 | 2.0 |
103             | Smith, John | Year 1 | 9.0 | 9.0 | 7.0 | 4.0 | 7.0 |
104             +----------------+--------+-----------+---------+----------+-------+-------+
105              
106             With the help of L, I came up with the following solution.
107              
108             use strict; use warnings;
109             use Text::CSV::Pivot;
110              
111             Text::CSV::Pivot->new({ input_file => 'sample.csv',
112             col_key_idx => 0,
113             col_name_idx => 1,
114             col_value_idx => 2 })->transform;
115              
116             After executing the above code, I got the expected result in C.
117              
118             =head1 SYNOPSIS
119              
120             Let's assume we have the following source csv file (sample.csv):
121              
122             +----------------+-----------+-----------------+
123             | Student | Subject | Result | Year |
124             +----------------+-----------+--------+--------+
125             | Smith, John | Music | 7.0 | Year 1 |
126             | Smith, John | Maths | 4.0 | Year 1 |
127             | Smith, John | History | 9.0 | Year 1 |
128             | Smith, John | Geography | 9.0 | Year 1 |
129             | Gabriel, Peter | Music | 2.0 | Year 1 |
130             | Gabriel, Peter | Maths | 10.0 | Year 1 |
131             | Gabriel, Peter | History | 7.0 | Year 1 |
132             | Gabriel, Peter | Language | 4.0 | Year 1 |
133             +----------------+-----------+--------+--------+
134              
135             If you notice, the student C<"Smith, John"> do not have any score for the subject
136             C<"Language"> and the student C<"Gabriel, Peter"> missing score for C<"Geography">.
137              
138             use strict; use warnings;
139             use Text::CSV::Pivot;
140              
141             Text::CSV::Pivot->new({ input_file => 'sample.csv',
142             col_key_idx => 0,
143             col_name_idx => 1,
144             col_value_idx => 2 })->transform;
145              
146             The above code would then create the result in C as below:
147              
148             +----------------+--------+-----------+---------+----------+-------+-------+
149             | Student | Year | Geography | History | Language | Maths | Music |
150             +----------------+--------+-----------+---------+----------+-------+-------+
151             | Gabriel, Peter | Year 1 | 10.0 | 7.0 | | 10.0 | 2.0 |
152             | Smith, John | Year 1 | | 9.0 | 7.0 | 4.0 | 7.0 |
153             +----------------+--------+-----------+---------+----------+-------+-------+
154              
155             In case, we would want to skip C<"Year"> column then the following code:
156              
157             use strict; use warnings;
158             use Text::CSV::Pivot;
159              
160             Text::CSV::Pivot->new({ input_file => 'sample.csv',
161             col_key_idx => 0,
162             col_name_idx => 1,
163             col_value_idx => 2,
164             col_skip_idx => [3] })->transform;
165              
166             You should get the result in C as below:
167              
168             +----------------+-----------+---------+----------+-------+-------+
169             | Student | Geography | History | Language | Maths | Music |
170             +----------------+-----------+---------+----------+-------+-------+
171             | Gabriel, Peter | 10.0 | 7.0 | | 10.0 | 2.0 |
172             | Smith, John | | 9.0 | 7.0 | 4.0 | 7.0 |
173             +----------------+-----------+---------+----------+-------+-------+
174              
175             =head1 CLI
176              
177             With the C or above, there is a CLI tool supplied with the
178             distribution called C.
179              
180             Usage: csv-pivot [OPTIONS]...
181              
182             OPTIONS:
183             -i, --i, -input-file, --input-file=s input file (required)
184             -o, --o, -output-file, --output-file=s output file (optional)
185             -k, --k, -col-key-idx, --col-key-idx=i key column index (required)
186             -n, --n, -col-name-idx, --col-name-idx=i name column index (required)
187             -v, --v, -col-value-idx, --col-value-idx=i value column index (required)
188             -s, --s, -col-skip-idx, --col-skip-idx=s comma separated skip column index (optional)
189             -h, --help print this message
190              
191             If you want to do something like below:
192              
193             use strict; use warnings;
194             use Text::CSV::Pivot;
195              
196             Text::CSV::Pivot->new({ input_file => 'sample.csv',
197             col_key_idx => 0,
198             col_name_idx => 1,
199             col_value_idx => 2,
200             col_skip_idx => [3] })->transform;
201              
202             Then this can be achieved using CLI C like below:
203              
204             $ cvs-pivot -i sample.csv -k 0 -n 1 -v 2 -s 3
205              
206             =head1 CONSTRUCTOR
207              
208             The following table explains the parameters for the constructor. However you can
209             also pass any valid parameters for C.
210              
211             +---------------+----------+---------------------------------------------------+
212             | Name | Required | Description |
213             +---------------+----------+---------------------------------------------------+
214             | input_file | Yes | Path to the source csv file. |
215             | output_file | No | Path to the output csv file. |
216             | col_key_idx | Yes | Column index that uniquely identify each row. |
217             | col_name_idx | Yes | Column index that would provide new column name. |
218             | col_value_idx | Yes | Column index that would provide new column value. |
219             | col_skip_idx | No | Column index to ignore in the output csv. |
220             +---------------+----------+---------------------------------------------------+
221              
222             Please note C is optional. If missing, it would create one for you. For
223             example if you provide C as B<"sample.csv"> then it would create output
224             file as B<"sample.pivot.csv">.
225              
226             Column index starts with 0, left to right. So in the example below, the C
227             would be 0. Similarly C and C would be 1 and 2 resp. In
228             case, we would want to skip the column C<"Year"> in the output file, then C
229             would be [3]. All index related parameters except C would expect number
230             0 or more. The C would expected an C of column index.
231              
232             +----------------+-----------+--------+--------+
233             | Student | Subject | Result | Year |
234             +----------------+-----------+--------+--------+
235             | Smith, John | Music | 7.0 | Year 1 |
236             | Smith, John | Maths | 4.0 | Year 1 |
237             | Smith, John | History | 9.0 | Year 1 |
238             | Smith, John | Language | 7.0 | Year 1 |
239             | Smith, John | Geography | 9.0 | Year 1 |
240             | Gabriel, Peter | Music | 2.0 | Year 1 |
241             | Gabriel, Peter | Maths | 10.0 | Year 1 |
242             | Gabriel, Peter | History | 7.0 | Year 1 |
243             | Gabriel, Peter | Language | 4.0 | Year 1 |
244             | Gabriel, Peter | Geography | 10.0 | Year 1 |
245             +----------------+-----------+--------+--------+
246              
247             Let's assume, we want column C<"Student"> to be our key column, the C<"Subject">
248             column to provide us the new column name and C<"Result"> column for the values.
249             Also C<"Year"> column to be skipped.Then the call would look like something below:
250              
251             use strict; use warnings;
252             use Text::CSV::Pivot;
253              
254             Text::CSV::Pivot->new({ input_file => 'sample.csv',
255             output_file => 'output.csv',
256             col_key_idx => 0,
257             col_name_idx => 1,
258             col_value_idx => 2,
259             col_skip_idx => [3] })->transform;
260              
261             =head1 METHODS
262              
263             =head2 transform()
264              
265             Tranform the source csv into the corresponding pivot csv based on the data passed
266             to the constructor.
267              
268             =cut
269              
270             sub transform {
271 4     4 1 7 my ($self) = @_;
272              
273 4         6 my $csv = $self->{'_csv_handler'};
274 4         5 my $out = $self->{'output_file'};
275 4         4 my $raw_data = $self->{'_raw_data'};
276 4         4 my $col_key_idx = $self->{'col_key_idx'};
277 4         5 my $col_name_idx = $self->{'col_name_idx'};
278 4         4 my $col_value_idx = $self->{'col_value_idx'};
279 4         6 my $col_skip_idx = $self->{'col_skip_idx'};
280 4         4 my $old_columns = $self->{'_old_columns'};
281 4         4 my $new_columns = $self->{'_new_columns'};
282              
283 4         4 my $new_column_headers = [];
284 4         10 foreach my $index (0 .. $#$old_columns) {
285 16 100 100     51 next if (($index == $col_name_idx)
      100        
286             || ($index == $col_value_idx)
287             || (grep($_ == $index, @$col_skip_idx) > 0));
288 7         12 push @$new_column_headers, $old_columns->[$index];
289             }
290 4         5 foreach my $new_column (@$new_columns) {
291 20         25 push @$new_column_headers, $new_column;
292             }
293              
294 4         16 $csv->eol("\r\n");
295 4         76 $csv->print($out, $new_column_headers);
296 4         38 foreach my $key (sort keys %$raw_data) {
297 8         32 my $row = [];
298 8         13 $row->[$col_key_idx] = $key;
299 8         13 foreach my $index (0 .. $#$old_columns) {
300 32 100 100     93 next if (($index == $col_key_idx)
      100        
      100        
301             || ($index == $col_name_idx)
302             || ($index == $col_value_idx)
303             || (grep($_ == $index, @$col_skip_idx) > 0));
304              
305 6 100 66     18 if (($index > $col_name_idx) || ($index > $col_value_idx)) {
306 2 50       5 if ($col_name_idx > $col_value_idx) {
307 0         0 $row->[$index-$col_name_idx] = $raw_data->{$key}->{$old_columns->[$index]};
308             }
309             else {
310 2         6 $row->[$index-$col_value_idx] = $raw_data->{$key}->{$old_columns->[$index]};
311             }
312             }
313             else {
314 4         8 $row->[$index] = $raw_data->{$key}->{$old_columns->[$index]};
315             }
316             }
317              
318 8         14 foreach my $column (@$new_columns) {
319 40   100     70 push @$row, $raw_data->{$key}->{$column} || '';
320             }
321              
322 8         57 $csv->print($out, $row);
323             }
324              
325 4         28 $csv->eof;
326 4         319 close($out);
327             }
328              
329             #
330             #
331             # PRIVATE METHODS
332              
333             sub _process_data {
334 4     4   8 my ($self) = @_;
335              
336 4         10 my $csv = $self->{'_csv_handler'};
337 4         8 my $in = $self->{'input_file'};
338 4         5 my $col_key_idx = $self->{'col_key_idx'};
339 4         4 my $col_name_idx = $self->{'col_name_idx'};
340 4         5 my $col_value_idx = $self->{'col_value_idx'};
341 4         4 my $columns = $self->{'_old_columns'};
342              
343 4         6 my $raw_data = {};
344 4         5 my $new_columns = {};
345              
346 4         78 while (my $values = $csv->getline($in)) {
347 38         695 my $key = $values->[$col_key_idx];
348 38         62 foreach my $index (0 .. $#$columns) {
349 152 100 100     354 next if (($index == $col_key_idx) || ($index == $col_name_idx) || ($index == $col_value_idx));
      100        
350 38         62 $raw_data->{$key}->{$columns->[$index]} = $values->[$index];
351             }
352              
353 38         66 $raw_data->{$key}->{$values->[$col_name_idx]} = $values->[$col_value_idx];
354 38         560 $new_columns->{$values->[$col_name_idx]} = 1;
355             }
356              
357 4         124 $csv->eof;
358 4         48 close($in);
359              
360 4         28 $self->{'_new_columns'} = [ sort keys %$new_columns ];
361 4         12 $self->{'_raw_data'} = $raw_data;
362             }
363              
364             =head1 AUTHOR
365              
366             Mohammad S Anwar, C<< >>
367              
368             =head1 REPOSITORY
369              
370             L
371              
372             =head1 SEE ALSO
373              
374             =over 4
375              
376             =item L
377              
378             =back
379              
380             =head1 BUGS
381              
382             Please report any bugs / feature requests to C
383             or through the web interface at L.
384             I will be notified, and then you'll automatically be notified of progress on your
385             bug as I make changes.
386              
387             =head1 SUPPORT
388              
389             You can find documentation for this module with the perldoc command.
390              
391             perldoc Text::CSV::Pivot
392              
393             You can also look for information at:
394              
395             =over 4
396              
397             =item * RT: CPAN's request tracker
398              
399             L
400              
401             =item * AnnoCPAN: Annotated CPAN documentation
402              
403             L
404              
405             =item * CPAN Ratings
406              
407             L
408              
409             =item * Search CPAN
410              
411             L
412              
413             =back
414              
415             =head1 LICENSE AND COPYRIGHT
416              
417             Copyright (C) 2018 Mohammad S Anwar.
418              
419             This program is free software; you can redistribute it and / or modify it under
420             the terms of the the Artistic License (2.0). You may obtain a copy of the full
421             license at:
422              
423             L
424              
425             Any use, modification, and distribution of the Standard or Modified Versions is
426             governed by this Artistic License.By using, modifying or distributing the Package,
427             you accept this license. Do not use, modify, or distribute the Package, if you do
428             not accept this license.
429              
430             If your Modified Version has been derived from a Modified Version made by someone
431             other than you,you are nevertheless required to ensure that your Modified Version
432             complies with the requirements of this license.
433              
434             This license does not grant you the right to use any trademark, service mark,
435             tradename, or logo of the Copyright Holder.
436              
437             This license includes the non-exclusive, worldwide, free-of-charge patent license
438             to make, have made, use, offer to sell, sell, import and otherwise transfer the
439             Package with respect to any patent claims licensable by the Copyright Holder that
440             are necessarily infringed by the Package. If you institute patent litigation
441             (including a cross-claim or counterclaim) against any party alleging that the
442             Package constitutes direct or contributory patent infringement,then this Artistic
443             License to you shall terminate on the date that such litigation is filed.
444              
445             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND
446             CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED
447             WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
448             NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS
449             REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT,
450             INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE
451             OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
452              
453             =cut
454              
455             1; # End of Text::CSV::Pivot