File Coverage

blib/lib/Text/CSV/Pivot.pm

Criterion	Covered	Total	%
statement	90	93	96.7
branch	20	28	71.4
condition	26	29	89.6
subroutine	7	7	100.0
pod	1	2	50.0
total	144	159	90.5

line	stmt	bran	cond	sub	pod	time	code
1							package Text::CSV::Pivot;
2
3							$Text::CSV::Pivot::VERSION = '0.05';
4							$Text::CSV::Pivot::AUTHORITY = 'cpan:MANWAR';
5
6							=head1 NAME
7
8							Text::CSV::Pivot - Transform CSV file into Pivot Table format.
9
10							=head1 VERSION
11
12							Version 0.05
13
14							=cut
15
16	2			2		131905	use 5.008;
	2					18
17	2			2		683	use Text::CSV;
	2					17494
	2					62
18	2			2		13	use File::Basename;
	2					4
	2					1837
19
20							sub new {
21	4			4	0	9110	my ($class, @args) = @_;
22
23	4	50	33			28	die "ERROR: Parameters have to be hashref."
24							if (@args == 1 && ((ref $args[0]) ne 'HASH'));
25
26	4					9	my @required_keys = qw(input_file col_key_idx col_name_idx col_value_idx);
27	4					8	my @optional_keys = qw(output_file col_skip_idx);
28
29	4					5	my $self = {};
30	4					11	foreach my $key (@required_keys) {
31	16					23	my $val = $args[0]->{$key};
32	16	50				27	die "ERROR: Missing required key '$key'." unless (defined $val);
33	16					61	$self->{$key} = delete $args[0]->{$key};
34							}
35	4					12	foreach my $key (@optional_keys) {
36	8					13	my $val = $args[0]->{$key};
37	8	100				14	if (defined $val) {
38	5					9	$self->{$key} = delete $args[0]->{$key};
39							}
40							}
41
42	4	100				11	$self->{col_skip_idx} = [] unless (exists $self->{col_skip_idx});
43	4	50				10	$args[0]->{binary} = 1 unless (exists $args[0]->{binary});
44
45	4	50				20	$self->{'_csv_handler'} = Text::CSV->new($args[0])
46							or die "ERROR: Can't use CSV: ".Text::CSV->error_diag;
47
48	4					427	my $input_file = $self->{'input_file'};
49	4	50		1		116	open(my $in, '<:encoding(utf8)', $input_file)
	1					5
	1					2
	1					4
50							or die "ERROR: Can't open input file $input_file [$!]\n";
51	4					14839	$self->{'input_file'} = $in;
52
53	4					7	my $output_file = $self->{'output_file'};
54	4	50				26	if (!defined $output_file) {
55	0					0	$output_file = fileparse($input_file, '\.[^\.]*');
56	0					0	$output_file = sprintf("%s.pivot.csv", $output_file);
57							}
58
59	4	50				159	open(my $out, '>:encoding(utf8)', $output_file)
60							or die "ERROR: Can't open output file $output_file [$!]\n";
61	4					218	$self->{'output_file'} = $out;
62
63	4					143	$self->{'_old_columns'} = $self->{'_csv_handler'}->getline($in);
64
65	4					216	bless $self, $class;
66
67	4					10	$self->_process_data;
68
69	4					20	return $self;
70							}
71
72
73							=head1 DESCRIPTION
74
75							Recently I was asked to prepare pivot table using csv file at work. Having done
76							that using quick and dirty perl script, I decided to clean up and make it generic
77							so that others can also benefit.
78
79							Below is sample data, I used for prototype as source csv file.
80
81							+----------------+-----------+--------+--------+
82							\| Student \| Subject \| Result \| Year \|
83							+----------------+-----------+--------+--------+
84							\| Smith, John \| Music \| 7.0 \| Year 1 \|
85							\| Smith, John \| Maths \| 4.0 \| Year 1 \|
86							\| Smith, John \| History \| 9.0 \| Year 1 \|
87							\| Smith, John \| Language \| 7.0 \| Year 1 \|
88							\| Smith, John \| Geography \| 9.0 \| Year 1 \|
89							\| Gabriel, Peter \| Music \| 2.0 \| Year 1 \|
90							\| Gabriel, Peter \| Maths \| 10.0 \| Year 1 \|
91							\| Gabriel, Peter \| History \| 7.0 \| Year 1 \|
92							\| Gabriel, Peter \| Language \| 4.0 \| Year 1 \|
93							\| Gabriel, Peter \| Geography \| 10.0 \| Year 1 \|
94							+----------------+-----------+--------+--------+
95
96							I aim to get something like this below.
97
98							+----------------+--------+-----------+---------+----------+-------+-------+
99							\| Student \| Year \| Geography \| History \| Language \| Maths \| Music \|
100							+----------------+--------+-----------+---------+----------+-------+-------+
101							\| Gabriel, Peter \| Year 1 \| 10.0 \| 7.0 \| 4.0 \| 10.0 \| 2.0 \|
102							\| Smith, John \| Year 1 \| 9.0 \| 9.0 \| 7.0 \| 4.0 \| 7.0 \|
103							+----------------+--------+-----------+---------+----------+-------+-------+
104
105							With the help of L, I came up with the following solution.
106
107							use strict; use warnings;
108							use Text::CSV::Pivot;
109
110							Text::CSV::Pivot->new({ input_file => 'sample.csv',
111							col_key_idx => 0,
112							col_name_idx => 1,
113							col_value_idx => 2 })->transform;
114
115							After executing the above code, I got the expected result in C.
116
117							=head1 SYNOPSIS
118
119							Let's assume we have the following source csv file (sample.csv):
120
121							+----------------+-----------+-----------------+
122							\| Student \| Subject \| Result \| Year \|
123							+----------------+-----------+--------+--------+
124							\| Smith, John \| Music \| 7.0 \| Year 1 \|
125							\| Smith, John \| Maths \| 4.0 \| Year 1 \|
126							\| Smith, John \| History \| 9.0 \| Year 1 \|
127							\| Smith, John \| Geography \| 9.0 \| Year 1 \|
128							\| Gabriel, Peter \| Music \| 2.0 \| Year 1 \|
129							\| Gabriel, Peter \| Maths \| 10.0 \| Year 1 \|
130							\| Gabriel, Peter \| History \| 7.0 \| Year 1 \|
131							\| Gabriel, Peter \| Language \| 4.0 \| Year 1 \|
132							+----------------+-----------+--------+--------+
133
134							If you notice, the student C<"Smith, John"> do not have any score for the subject
135							C<"Language"> and the student C<"Gabriel, Peter"> missing score for C<"Geography">.
136
137							use strict; use warnings;
138							use Text::CSV::Pivot;
139
140							Text::CSV::Pivot->new({ input_file => 'sample.csv',
141							col_key_idx => 0,
142							col_name_idx => 1,
143							col_value_idx => 2 })->transform;
144
145							The above code would then create the result in C as below:
146
147							+----------------+--------+-----------+---------+----------+-------+-------+
148							\| Student \| Year \| Geography \| History \| Language \| Maths \| Music \|
149							+----------------+--------+-----------+---------+----------+-------+-------+
150							\| Gabriel, Peter \| Year 1 \| 10.0 \| 7.0 \| \| 10.0 \| 2.0 \|
151							\| Smith, John \| Year 1 \| \| 9.0 \| 7.0 \| 4.0 \| 7.0 \|
152							+----------------+--------+-----------+---------+----------+-------+-------+
153
154							In case, we would want to skip "Year" column then the following code:
155
156							use strict; use warnings;
157							use Text::CSV::Pivot;
158
159							Text::CSV::Pivot->new({ input_file => 'sample.csv',
160							col_key_idx => 0,
161							col_name_idx => 1,
162							col_value_idx => 2,
163							col_skip_idx => [3] })->transform;
164
165							You should get the result as below:
166
167							+----------------+-----------+---------+----------+-------+-------+
168							\| Student \| Geography \| History \| Language \| Maths \| Music \|
169							+----------------+-----------+---------+----------+-------+-------+
170							\| Gabriel, Peter \| 10.0 \| 7.0 \| \| 10.0 \| 2.0 \|
171							\| Smith, John \| \| 9.0 \| 7.0 \| 4.0 \| 7.0 \|
172							+----------------+-----------+---------+----------+-------+-------+
173
174							=head1 CONSTRUCTOR
175
176							The following table explains the parameters for the constructor. However you can
177							also pass any valid parameters for C.
178
179							+---------------+----------+---------------------------------------------------+
180							\| Name \| Required \| Description \|
181							+---------------+----------+---------------------------------------------------+
182							\| input_file \| Yes \| Path to the source csv file. \|
183							\| output_file \| No \| Path to the output csv file. \|
184							\| col_key_idx \| Yes \| Column index that uniquely identify each row. \|
185							\| col_name_idx \| Yes \| Column index that would provide new column name. \|
186							\| col_value_idx \| Yes \| Column index that would provide new column value. \|
187							\| col_skip_idx \| No \| Column index to ignore in the output csv. \|
188							+---------------+----------+---------------------------------------------------+
189
190							Please note C is optional. If missing, it would create one for you. For
191							example if you provide C as B<"sample.csv"> then it would create output
192							file as B<"sample.pivot.csv">.
193
194							Column index starts with 0, left to right. So in the example below, the C
195							would be 0. Similarly C and C would be 1 and 2 resp. In
196							case, we would want to skip the column "Year" in the output file, then C
197							would be [3]. All index related parameters except C would expect number
198							0 or more. The C would expected an C of column index.
199
200							+----------------+-----------+--------+--------+
201							\| Student \| Subject \| Result \| Year \|
202							+----------------+-----------+--------+--------+
203							\| Smith, John \| Music \| 7.0 \| Year 1 \|
204							\| Smith, John \| Maths \| 4.0 \| Year 1 \|
205							\| Smith, John \| History \| 9.0 \| Year 1 \|
206							\| Smith, John \| Language \| 7.0 \| Year 1 \|
207							\| Smith, John \| Geography \| 9.0 \| Year 1 \|
208							\| Gabriel, Peter \| Music \| 2.0 \| Year 1 \|
209							\| Gabriel, Peter \| Maths \| 10.0 \| Year 1 \|
210							\| Gabriel, Peter \| History \| 7.0 \| Year 1 \|
211							\| Gabriel, Peter \| Language \| 4.0 \| Year 1 \|
212							\| Gabriel, Peter \| Geography \| 10.0 \| Year 1 \|
213							+----------------+-----------+--------+--------+
214
215							Let's assume, we want column C<"Student"> to be our key column, the C<"Subject">
216							column to provide us the new column name and C<"Result"> column for the values.
217							Also C<"Year"> column to be skipped.Then the call would look like something below:
218
219							use strict; use warnings;
220							use Text::CSV::Pivot;
221
222							Text::CSV::Pivot->new({ input_file => 'sample.csv',
223							output_file => 'output.csv',
224							col_key_idx => 0,
225							col_name_idx => 1,
226							col_value_idx => 2,
227							col_skip_idx => [3] })->transform;
228
229							=head1 METHODS
230
231							=head2 transform()
232
233							Tranform the source csv into the corresponding pivot csv based on the data passed
234							to the constructor.
235
236							=cut
237
238							sub transform {
239	4			4	1	9	my ($self) = @_;
240
241	4					7	my $csv = $self->{'_csv_handler'};
242	4					7	my $out = $self->{'output_file'};
243	4					5	my $raw_data = $self->{'_raw_data'};
244	4					15	my $col_key_idx = $self->{'col_key_idx'};
245	4					6	my $col_name_idx = $self->{'col_name_idx'};
246	4					4	my $col_value_idx = $self->{'col_value_idx'};
247	4					6	my $col_skip_idx = $self->{'col_skip_idx'};
248	4					6	my $old_columns = $self->{'_old_columns'};
249	4					5	my $new_columns = $self->{'_new_columns'};
250
251	4					7	my $new_column_headers = [];
252	4					10	foreach my $index (0 .. $#$old_columns) {
253	16	100	100			58	next if (($index == $col_name_idx)
			100
254							\|\| ($index == $col_value_idx)
255							\|\| (grep($_ == $index, @$col_skip_idx) > 0));
256	7					18	push @$new_column_headers, $old_columns->[$index];
257							}
258	4					7	foreach my $new_column (@$new_columns) {
259	20					27	push @$new_column_headers, $new_column;
260							}
261
262	4					16	$csv->eol("\r\n");
263	4					80	$csv->print($out, $new_column_headers);
264	4					44	foreach my $key (sort keys %$raw_data) {
265	8					34	my $row = [];
266	8					12	$row->[$col_key_idx] = $key;
267	8					16	foreach my $index (0 .. $#$old_columns) {
268	32	100	100			101	next if (($index == $col_key_idx)
			100
			100
269							\|\| ($index == $col_name_idx)
270							\|\| ($index == $col_value_idx)
271							\|\| (grep($_ == $index, @$col_skip_idx) > 0));
272
273	6	100	66			18	if (($index > $col_name_idx) \|\| ($index > $col_value_idx)) {
274	2	50				4	if ($col_name_idx > $col_value_idx) {
275	0					0	$row->[$index-$col_name_idx] = $raw_data->{$key}->{$old_columns->[$index]};
276							}
277							else {
278	2					6	$row->[$index-$col_value_idx] = $raw_data->{$key}->{$old_columns->[$index]};
279							}
280							}
281							else {
282	4					9	$row->[$index] = $raw_data->{$key}->{$old_columns->[$index]};
283							}
284							}
285
286	8					15	foreach my $column (@$new_columns) {
287	40		100			74	push @$row, $raw_data->{$key}->{$column} \|\| '';
288							}
289
290	8					31	$csv->print($out, $row);
291							}
292
293	4					32	$csv->eof;
294	4					307	close($out);
295							}
296
297							#
298							#
299							# PRIVATE METHODS
300
301							sub _process_data {
302	4			4		8	my ($self) = @_;
303
304	4					9	my $csv = $self->{'_csv_handler'};
305	4					5	my $in = $self->{'input_file'};
306	4					6	my $col_key_idx = $self->{'col_key_idx'};
307	4					5	my $col_name_idx = $self->{'col_name_idx'};
308	4					5	my $col_value_idx = $self->{'col_value_idx'};
309	4					5	my $columns = $self->{'_old_columns'};
310
311	4					7	my $raw_data = {};
312	4					14	my $new_columns = {};
313
314	4					82	while (my $values = $csv->getline($in)) {
315	38					847	my $key = $values->[$col_key_idx];
316	38					109	foreach my $index (0 .. $#$columns) {
317	152	100	100			393	next if (($index == $col_key_idx) \|\| ($index == $col_name_idx) \|\| ($index == $col_value_idx));
			100
318	38					83	$raw_data->{$key}->{$columns->[$index]} = $values->[$index];
319							}
320
321	38					79	$raw_data->{$key}->{$values->[$col_name_idx]} = $values->[$col_value_idx];
322	38					615	$new_columns->{$values->[$col_name_idx]} = 1;
323							}
324
325	4					137	$csv->eof;
326	4					60	close($in);
327
328	4					29	$self->{'_new_columns'} = [ sort keys %$new_columns ];
329	4					15	$self->{'_raw_data'} = $raw_data;
330							}
331
332							=head1 AUTHOR
333
334							Mohammad S Anwar, C<< >>
335
336							=head1 REPOSITORY
337
338							L
339
340							=head1 SEE ALSO
341
342							=over 4
343
344							=item L
345
346							=back
347
348							=head1 BUGS
349
350							Please report any bugs / feature requests to C
351							or through the web interface at L.
352							I will be notified, and then you'll automatically be notified of progress on your
353							bug as I make changes.
354
355							=head1 SUPPORT
356
357							You can find documentation for this module with the perldoc command.
358
359							perldoc Text::CSV::Pivot
360
361							You can also look for information at:
362
363							=over 4
364
365							=item * RT: CPAN's request tracker
366
367							L
368
369							=item * AnnoCPAN: Annotated CPAN documentation
370
371							L
372
373							=item * CPAN Ratings
374
375							L
376
377							=item * Search CPAN
378
379							L
380
381							=back
382
383							=head1 LICENSE AND COPYRIGHT
384
385							Copyright (C) 2018 Mohammad S Anwar.
386
387							This program is free software; you can redistribute it and / or modify it under
388							the terms of the the Artistic License (2.0). You may obtain a copy of the full
389							license at:
390
391							L
392
393							Any use, modification, and distribution of the Standard or Modified Versions is
394							governed by this Artistic License.By using, modifying or distributing the Package,
395							you accept this license. Do not use, modify, or distribute the Package, if you do
396							not accept this license.
397
398							If your Modified Version has been derived from a Modified Version made by someone
399							other than you,you are nevertheless required to ensure that your Modified Version
400							complies with the requirements of this license.
401
402							This license does not grant you the right to use any trademark, service mark,
403							tradename, or logo of the Copyright Holder.
404
405							This license includes the non-exclusive, worldwide, free-of-charge patent license
406							to make, have made, use, offer to sell, sell, import and otherwise transfer the
407							Package with respect to any patent claims licensable by the Copyright Holder that
408							are necessarily infringed by the Package. If you institute patent litigation
409							(including a cross-claim or counterclaim) against any party alleging that the
410							Package constitutes direct or contributory patent infringement,then this Artistic
411							License to you shall terminate on the date that such litigation is filed.
412
413							Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND
414							CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED
415							WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
416							NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS
417							REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT,
418							INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE
419							OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
420
421							=cut
422
423							1; # End of Text::CSV::Pivot