File Coverage

blib/lib/Text/CSV/Pivot.pm
Criterion Covered Total %
statement 90 93 96.7
branch 20 28 71.4
condition 26 29 89.6
subroutine 7 7 100.0
pod 1 2 50.0
total 144 159 90.5


line stmt bran cond sub pod time code
1             package Text::CSV::Pivot;
2              
3             $Text::CSV::Pivot::VERSION = '0.05';
4             $Text::CSV::Pivot::AUTHORITY = 'cpan:MANWAR';
5              
6             =head1 NAME
7              
8             Text::CSV::Pivot - Transform CSV file into Pivot Table format.
9              
10             =head1 VERSION
11              
12             Version 0.05
13              
14             =cut
15              
16 2     2   131905 use 5.008;
  2         18  
17 2     2   683 use Text::CSV;
  2         17494  
  2         62  
18 2     2   13 use File::Basename;
  2         4  
  2         1837  
19              
20             sub new {
21 4     4 0 9110 my ($class, @args) = @_;
22              
23 4 50 33     28 die "ERROR: Parameters have to be hashref."
24             if (@args == 1 && ((ref $args[0]) ne 'HASH'));
25              
26 4         9 my @required_keys = qw(input_file col_key_idx col_name_idx col_value_idx);
27 4         8 my @optional_keys = qw(output_file col_skip_idx);
28              
29 4         5 my $self = {};
30 4         11 foreach my $key (@required_keys) {
31 16         23 my $val = $args[0]->{$key};
32 16 50       27 die "ERROR: Missing required key '$key'." unless (defined $val);
33 16         61 $self->{$key} = delete $args[0]->{$key};
34             }
35 4         12 foreach my $key (@optional_keys) {
36 8         13 my $val = $args[0]->{$key};
37 8 100       14 if (defined $val) {
38 5         9 $self->{$key} = delete $args[0]->{$key};
39             }
40             }
41              
42 4 100       11 $self->{col_skip_idx} = [] unless (exists $self->{col_skip_idx});
43 4 50       10 $args[0]->{binary} = 1 unless (exists $args[0]->{binary});
44              
45 4 50       20 $self->{'_csv_handler'} = Text::CSV->new($args[0])
46             or die "ERROR: Can't use CSV: ".Text::CSV->error_diag;
47              
48 4         427 my $input_file = $self->{'input_file'};
49 4 50   1   116 open(my $in, '<:encoding(utf8)', $input_file)
  1         5  
  1         2  
  1         4  
50             or die "ERROR: Can't open input file $input_file [$!]\n";
51 4         14839 $self->{'input_file'} = $in;
52              
53 4         7 my $output_file = $self->{'output_file'};
54 4 50       26 if (!defined $output_file) {
55 0         0 $output_file = fileparse($input_file, '\.[^\.]*');
56 0         0 $output_file = sprintf("%s.pivot.csv", $output_file);
57             }
58              
59 4 50       159 open(my $out, '>:encoding(utf8)', $output_file)
60             or die "ERROR: Can't open output file $output_file [$!]\n";
61 4         218 $self->{'output_file'} = $out;
62              
63 4         143 $self->{'_old_columns'} = $self->{'_csv_handler'}->getline($in);
64              
65 4         216 bless $self, $class;
66              
67 4         10 $self->_process_data;
68              
69 4         20 return $self;
70             }
71              
72              
73             =head1 DESCRIPTION
74              
75             Recently I was asked to prepare pivot table using csv file at work. Having done
76             that using quick and dirty perl script, I decided to clean up and make it generic
77             so that others can also benefit.
78              
79             Below is sample data, I used for prototype as source csv file.
80              
81             +----------------+-----------+--------+--------+
82             | Student | Subject | Result | Year |
83             +----------------+-----------+--------+--------+
84             | Smith, John | Music | 7.0 | Year 1 |
85             | Smith, John | Maths | 4.0 | Year 1 |
86             | Smith, John | History | 9.0 | Year 1 |
87             | Smith, John | Language | 7.0 | Year 1 |
88             | Smith, John | Geography | 9.0 | Year 1 |
89             | Gabriel, Peter | Music | 2.0 | Year 1 |
90             | Gabriel, Peter | Maths | 10.0 | Year 1 |
91             | Gabriel, Peter | History | 7.0 | Year 1 |
92             | Gabriel, Peter | Language | 4.0 | Year 1 |
93             | Gabriel, Peter | Geography | 10.0 | Year 1 |
94             +----------------+-----------+--------+--------+
95              
96             I aim to get something like this below.
97              
98             +----------------+--------+-----------+---------+----------+-------+-------+
99             | Student | Year | Geography | History | Language | Maths | Music |
100             +----------------+--------+-----------+---------+----------+-------+-------+
101             | Gabriel, Peter | Year 1 | 10.0 | 7.0 | 4.0 | 10.0 | 2.0 |
102             | Smith, John | Year 1 | 9.0 | 9.0 | 7.0 | 4.0 | 7.0 |
103             +----------------+--------+-----------+---------+----------+-------+-------+
104              
105             With the help of L, I came up with the following solution.
106              
107             use strict; use warnings;
108             use Text::CSV::Pivot;
109              
110             Text::CSV::Pivot->new({ input_file => 'sample.csv',
111             col_key_idx => 0,
112             col_name_idx => 1,
113             col_value_idx => 2 })->transform;
114              
115             After executing the above code, I got the expected result in C.
116              
117             =head1 SYNOPSIS
118              
119             Let's assume we have the following source csv file (sample.csv):
120              
121             +----------------+-----------+-----------------+
122             | Student | Subject | Result | Year |
123             +----------------+-----------+--------+--------+
124             | Smith, John | Music | 7.0 | Year 1 |
125             | Smith, John | Maths | 4.0 | Year 1 |
126             | Smith, John | History | 9.0 | Year 1 |
127             | Smith, John | Geography | 9.0 | Year 1 |
128             | Gabriel, Peter | Music | 2.0 | Year 1 |
129             | Gabriel, Peter | Maths | 10.0 | Year 1 |
130             | Gabriel, Peter | History | 7.0 | Year 1 |
131             | Gabriel, Peter | Language | 4.0 | Year 1 |
132             +----------------+-----------+--------+--------+
133              
134             If you notice, the student C<"Smith, John"> do not have any score for the subject
135             C<"Language"> and the student C<"Gabriel, Peter"> missing score for C<"Geography">.
136              
137             use strict; use warnings;
138             use Text::CSV::Pivot;
139              
140             Text::CSV::Pivot->new({ input_file => 'sample.csv',
141             col_key_idx => 0,
142             col_name_idx => 1,
143             col_value_idx => 2 })->transform;
144              
145             The above code would then create the result in C as below:
146              
147             +----------------+--------+-----------+---------+----------+-------+-------+
148             | Student | Year | Geography | History | Language | Maths | Music |
149             +----------------+--------+-----------+---------+----------+-------+-------+
150             | Gabriel, Peter | Year 1 | 10.0 | 7.0 | | 10.0 | 2.0 |
151             | Smith, John | Year 1 | | 9.0 | 7.0 | 4.0 | 7.0 |
152             +----------------+--------+-----------+---------+----------+-------+-------+
153              
154             In case, we would want to skip "Year" column then the following code:
155              
156             use strict; use warnings;
157             use Text::CSV::Pivot;
158              
159             Text::CSV::Pivot->new({ input_file => 'sample.csv',
160             col_key_idx => 0,
161             col_name_idx => 1,
162             col_value_idx => 2,
163             col_skip_idx => [3] })->transform;
164              
165             You should get the result as below:
166              
167             +----------------+-----------+---------+----------+-------+-------+
168             | Student | Geography | History | Language | Maths | Music |
169             +----------------+-----------+---------+----------+-------+-------+
170             | Gabriel, Peter | 10.0 | 7.0 | | 10.0 | 2.0 |
171             | Smith, John | | 9.0 | 7.0 | 4.0 | 7.0 |
172             +----------------+-----------+---------+----------+-------+-------+
173              
174             =head1 CONSTRUCTOR
175              
176             The following table explains the parameters for the constructor. However you can
177             also pass any valid parameters for C.
178              
179             +---------------+----------+---------------------------------------------------+
180             | Name | Required | Description |
181             +---------------+----------+---------------------------------------------------+
182             | input_file | Yes | Path to the source csv file. |
183             | output_file | No | Path to the output csv file. |
184             | col_key_idx | Yes | Column index that uniquely identify each row. |
185             | col_name_idx | Yes | Column index that would provide new column name. |
186             | col_value_idx | Yes | Column index that would provide new column value. |
187             | col_skip_idx | No | Column index to ignore in the output csv. |
188             +---------------+----------+---------------------------------------------------+
189              
190             Please note C is optional. If missing, it would create one for you. For
191             example if you provide C as B<"sample.csv"> then it would create output
192             file as B<"sample.pivot.csv">.
193              
194             Column index starts with 0, left to right. So in the example below, the C
195             would be 0. Similarly C and C would be 1 and 2 resp. In
196             case, we would want to skip the column "Year" in the output file, then C
197             would be [3]. All index related parameters except C would expect number
198             0 or more. The C would expected an C of column index.
199              
200             +----------------+-----------+--------+--------+
201             | Student | Subject | Result | Year |
202             +----------------+-----------+--------+--------+
203             | Smith, John | Music | 7.0 | Year 1 |
204             | Smith, John | Maths | 4.0 | Year 1 |
205             | Smith, John | History | 9.0 | Year 1 |
206             | Smith, John | Language | 7.0 | Year 1 |
207             | Smith, John | Geography | 9.0 | Year 1 |
208             | Gabriel, Peter | Music | 2.0 | Year 1 |
209             | Gabriel, Peter | Maths | 10.0 | Year 1 |
210             | Gabriel, Peter | History | 7.0 | Year 1 |
211             | Gabriel, Peter | Language | 4.0 | Year 1 |
212             | Gabriel, Peter | Geography | 10.0 | Year 1 |
213             +----------------+-----------+--------+--------+
214              
215             Let's assume, we want column C<"Student"> to be our key column, the C<"Subject">
216             column to provide us the new column name and C<"Result"> column for the values.
217             Also C<"Year"> column to be skipped.Then the call would look like something below:
218              
219             use strict; use warnings;
220             use Text::CSV::Pivot;
221              
222             Text::CSV::Pivot->new({ input_file => 'sample.csv',
223             output_file => 'output.csv',
224             col_key_idx => 0,
225             col_name_idx => 1,
226             col_value_idx => 2,
227             col_skip_idx => [3] })->transform;
228              
229             =head1 METHODS
230              
231             =head2 transform()
232              
233             Tranform the source csv into the corresponding pivot csv based on the data passed
234             to the constructor.
235              
236             =cut
237              
238             sub transform {
239 4     4 1 9 my ($self) = @_;
240              
241 4         7 my $csv = $self->{'_csv_handler'};
242 4         7 my $out = $self->{'output_file'};
243 4         5 my $raw_data = $self->{'_raw_data'};
244 4         15 my $col_key_idx = $self->{'col_key_idx'};
245 4         6 my $col_name_idx = $self->{'col_name_idx'};
246 4         4 my $col_value_idx = $self->{'col_value_idx'};
247 4         6 my $col_skip_idx = $self->{'col_skip_idx'};
248 4         6 my $old_columns = $self->{'_old_columns'};
249 4         5 my $new_columns = $self->{'_new_columns'};
250              
251 4         7 my $new_column_headers = [];
252 4         10 foreach my $index (0 .. $#$old_columns) {
253 16 100 100     58 next if (($index == $col_name_idx)
      100        
254             || ($index == $col_value_idx)
255             || (grep($_ == $index, @$col_skip_idx) > 0));
256 7         18 push @$new_column_headers, $old_columns->[$index];
257             }
258 4         7 foreach my $new_column (@$new_columns) {
259 20         27 push @$new_column_headers, $new_column;
260             }
261              
262 4         16 $csv->eol("\r\n");
263 4         80 $csv->print($out, $new_column_headers);
264 4         44 foreach my $key (sort keys %$raw_data) {
265 8         34 my $row = [];
266 8         12 $row->[$col_key_idx] = $key;
267 8         16 foreach my $index (0 .. $#$old_columns) {
268 32 100 100     101 next if (($index == $col_key_idx)
      100        
      100        
269             || ($index == $col_name_idx)
270             || ($index == $col_value_idx)
271             || (grep($_ == $index, @$col_skip_idx) > 0));
272              
273 6 100 66     18 if (($index > $col_name_idx) || ($index > $col_value_idx)) {
274 2 50       4 if ($col_name_idx > $col_value_idx) {
275 0         0 $row->[$index-$col_name_idx] = $raw_data->{$key}->{$old_columns->[$index]};
276             }
277             else {
278 2         6 $row->[$index-$col_value_idx] = $raw_data->{$key}->{$old_columns->[$index]};
279             }
280             }
281             else {
282 4         9 $row->[$index] = $raw_data->{$key}->{$old_columns->[$index]};
283             }
284             }
285              
286 8         15 foreach my $column (@$new_columns) {
287 40   100     74 push @$row, $raw_data->{$key}->{$column} || '';
288             }
289              
290 8         31 $csv->print($out, $row);
291             }
292              
293 4         32 $csv->eof;
294 4         307 close($out);
295             }
296              
297             #
298             #
299             # PRIVATE METHODS
300              
301             sub _process_data {
302 4     4   8 my ($self) = @_;
303              
304 4         9 my $csv = $self->{'_csv_handler'};
305 4         5 my $in = $self->{'input_file'};
306 4         6 my $col_key_idx = $self->{'col_key_idx'};
307 4         5 my $col_name_idx = $self->{'col_name_idx'};
308 4         5 my $col_value_idx = $self->{'col_value_idx'};
309 4         5 my $columns = $self->{'_old_columns'};
310              
311 4         7 my $raw_data = {};
312 4         14 my $new_columns = {};
313              
314 4         82 while (my $values = $csv->getline($in)) {
315 38         847 my $key = $values->[$col_key_idx];
316 38         109 foreach my $index (0 .. $#$columns) {
317 152 100 100     393 next if (($index == $col_key_idx) || ($index == $col_name_idx) || ($index == $col_value_idx));
      100        
318 38         83 $raw_data->{$key}->{$columns->[$index]} = $values->[$index];
319             }
320              
321 38         79 $raw_data->{$key}->{$values->[$col_name_idx]} = $values->[$col_value_idx];
322 38         615 $new_columns->{$values->[$col_name_idx]} = 1;
323             }
324              
325 4         137 $csv->eof;
326 4         60 close($in);
327              
328 4         29 $self->{'_new_columns'} = [ sort keys %$new_columns ];
329 4         15 $self->{'_raw_data'} = $raw_data;
330             }
331              
332             =head1 AUTHOR
333              
334             Mohammad S Anwar, C<< >>
335              
336             =head1 REPOSITORY
337              
338             L
339              
340             =head1 SEE ALSO
341              
342             =over 4
343              
344             =item L
345              
346             =back
347              
348             =head1 BUGS
349              
350             Please report any bugs / feature requests to C
351             or through the web interface at L.
352             I will be notified, and then you'll automatically be notified of progress on your
353             bug as I make changes.
354              
355             =head1 SUPPORT
356              
357             You can find documentation for this module with the perldoc command.
358              
359             perldoc Text::CSV::Pivot
360              
361             You can also look for information at:
362              
363             =over 4
364              
365             =item * RT: CPAN's request tracker
366              
367             L
368              
369             =item * AnnoCPAN: Annotated CPAN documentation
370              
371             L
372              
373             =item * CPAN Ratings
374              
375             L
376              
377             =item * Search CPAN
378              
379             L
380              
381             =back
382              
383             =head1 LICENSE AND COPYRIGHT
384              
385             Copyright (C) 2018 Mohammad S Anwar.
386              
387             This program is free software; you can redistribute it and / or modify it under
388             the terms of the the Artistic License (2.0). You may obtain a copy of the full
389             license at:
390              
391             L
392              
393             Any use, modification, and distribution of the Standard or Modified Versions is
394             governed by this Artistic License.By using, modifying or distributing the Package,
395             you accept this license. Do not use, modify, or distribute the Package, if you do
396             not accept this license.
397              
398             If your Modified Version has been derived from a Modified Version made by someone
399             other than you,you are nevertheless required to ensure that your Modified Version
400             complies with the requirements of this license.
401              
402             This license does not grant you the right to use any trademark, service mark,
403             tradename, or logo of the Copyright Holder.
404              
405             This license includes the non-exclusive, worldwide, free-of-charge patent license
406             to make, have made, use, offer to sell, sell, import and otherwise transfer the
407             Package with respect to any patent claims licensable by the Copyright Holder that
408             are necessarily infringed by the Package. If you institute patent litigation
409             (including a cross-claim or counterclaim) against any party alleging that the
410             Package constitutes direct or contributory patent infringement,then this Artistic
411             License to you shall terminate on the date that such litigation is filed.
412              
413             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND
414             CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED
415             WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
416             NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS
417             REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT,
418             INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE
419             OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
420              
421             =cut
422              
423             1; # End of Text::CSV::Pivot