File Coverage

blib/lib/Fsdb/Filter/dbcolsplittorows.pm

Criterion	Covered	Total	%
statement	18	61	29.5
branch	0	24	0.0
condition			n/a
subroutine	6	15	40.0
pod	5	5	100.0
total	29	105	27.6

line	stmt	bran	sub	pod	time	code
1						#!/usr/bin/perl
2
3						#
4						# dbcolsplittorows.pm
5						# Copyright (C) 1991-2008 by John Heidemann
6						# $Id: 56d2923e0064a9026e77232e84b7f8a1c48e1947 $
7						#
8						# This program is distributed under terms of the GNU general
9						# public license, version 2. See the file COPYING
10						# in $dblibdir for details.
11						#
12
13						package Fsdb::Filter::dbcolsplittorows;
14
15						=head1 NAME
16
17						dbcolsplittorows - split an existing column into multiple new rows
18
19						=head1 SYNOPSIS
20
21						dbcolsplittorows [-C ElementSeperator] [-e null] [-E] [-N enumerated-name] column [column...]
22
23						=head1 DESCRIPTION
24
25						Split column into pieces, outputting one row for each piece.
26
27						By default, any empty fields are ignored.
28						If an empty field value is given with -e, then they produce output.
29
30						When a null value is given, empty fields at the beginning and end of
31						lines are suppressed (like perl split). Unlike perl, if ALL fields
32						are empty, we generate one (and not zero) empty fields.
33
34						The inverse of this commend is L.
35
36
37						=head1 OPTIONS
38
39						=over 4
40
41						=item B<-C S> or B<--element-separator S>
42
43						Specify the separator used to split columns.
44						(Defaults to a single underscore.)
45
46						=item B<-E> or B<--enumerate>
47
48						Enumerate output columns: rather than assuming the column name uses
49						the element separator, we keep it whole and fill in with indexes
50						starting from 0.
51
52						=item B<-N> or B<--new-name> N
53
54						Name the new column N for enumeration.
55						Defaults to C.
56
57						=item B<-e E> or B<--empty E>
58
59						give value E as the value for empty (null) records
60
61						=back
62
63						=for comment
64						begin_standard_fsdb_options
65
66						This module also supports the standard fsdb options:
67
68						=over 4
69
70						=item B<-d>
71
72						Enable debugging output.
73
74						=item B<-i> or B<--input> InputSource
75
76						Read from InputSource, typically a file name, or C<-> for standard input,
77						or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
78
79						=item B<-o> or B<--output> OutputDestination
80
81						Write to OutputDestination, typically a file name, or C<-> for standard output,
82						or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
83
84						=item B<--autorun> or B<--noautorun>
85
86						By default, programs process automatically,
87						but Fsdb::Filter objects in Perl do not run until you invoke
88						the run() method.
89						The C<--(no)autorun> option controls that behavior within Perl.
90
91						=item B<--help>
92
93						Show help.
94
95						=item B<--man>
96
97						Show full manual.
98
99						=back
100
101						=for comment
102						end_standard_fsdb_options
103
104
105						=head1 SAMPLE USAGE
106
107						=head2 Input:
108
109						#fsdb name uid
110						John_Heidemann 2274
111						Greg_Johnson 2275
112						Root 0
113						# this is a simple database
114						# \| dbcol fullname uid
115						# \| dbcolrename fullname name
116
117						=head2 Command:
118
119						cat data.fsdb \| dbcolsplittorows name
120
121						=head2 Output:
122
123						#fsdb name uid
124						John 2274
125						Heidemann 2274
126						Greg 2275
127						Johnson 2275
128						Root 0
129						# this is a simple database
130						# \| dbcol fullname uid
131						# \| dbcolrename fullname name
132						# \| dbcolsplittorows name
133
134						=head1 SEE ALSO
135
136						L.
137						L.
138						L.
139						L.
140						L.
141
142
143						=head1 CLASS FUNCTIONS
144
145						=cut
146
147						@ISA = qw(Fsdb::Filter);
148						$VERSION = 2.0;
149
150	1		1		6711	use strict;
	1				4
	1				45
151	1		1		12	use Pod::Usage;
	1				3
	1				146
152	1		1		11	use Carp;
	1				3
	1				83
153
154	1		1		7	use Fsdb::Filter;
	1				1
	1				19
155	1		1		4	use Fsdb::IO::Reader;
	1				2
	1				17
156	1		1		4	use Fsdb::IO::Writer;
	1				2
	1				724
157
158
159						=head2 new
160
161						$filter = new Fsdb::Filter::dbcolsplittocols(@arguments);
162
163						Create a new dbcolsplittocols object, taking command-line arguments.
164
165						=cut
166
167						sub new ($@) {
168	0		0	1		my $class = shift @_;
169	0					my $self = $class->SUPER::new(@_);
170	0					bless $self, $class;
171	0					$self->set_defaults;
172	0					$self->parse_options(@_);
173	0					$self->SUPER::post_new();
174	0					return $self;
175						}
176
177
178						=head2 set_defaults
179
180						$filter->set_defaults();
181
182						Internal: set up defaults.
183
184						=cut
185
186						sub set_defaults ($) {
187	0		0	1		my($self) = @_;
188	0					$self->SUPER::set_defaults();
189	0					$self->{_elem_separator} = '_';
190	0					$self->{_enumerate} = undef;
191	0					$self->{_destination_column} = 'count';
192	0					$self->{_target_column} = undef;
193	0					$self->{_empty} = undef;
194						}
195
196						=head2 parse_options
197
198						$filter->parse_options(@ARGV);
199
200						Internal: parse command-line arguments.
201
202						=cut
203
204						sub parse_options ($@) {
205	0		0	1		my $self = shift @_;
206
207	0					my(@argv) = @_;
208						$self->get_options(
209						\@argv,
210	0		0			'help\|?' => sub { pod2usage(1); },
211	0		0			'man' => sub { pod2usage(-verbose => 2); },
212						'autorun!' => \$self->{_autorun},
213						'close!' => \$self->{_close},
214						'C\|element-separator=s' => \$self->{_elem_separator},
215						'd\|debug+' => \$self->{_debug},
216						'e\|empty=s' => \$self->{_empty},
217						'E\|enumerate!' => \$self->{_enumerate},
218	0		0			'i\|input=s' => sub { $self->parse_io_option('input', @_); },
219						'log!' => \$self->{_logprog},
220						'N\|new-name=s' => \$self->{_destination_column},
221	0		0			'o\|output=s' => sub { $self->parse_io_option('output', @_); },
222	0	0				) or pod2usage(2);
223	0					$self->parse_target_column(\@argv);
224						}
225
226						=head2 setup
227
228						$filter->setup();
229
230						Internal: setup, parse headers.
231
232						=cut
233
234						sub setup ($) {
235	0		0	1		my($self) = @_;
236
237	0					$self->finish_io_option('input', -comment_handler => $self->create_pass_comments_sub);
238
239	0	0				pod2usage(2) if (!defined($self->{_target_column}));
240
241	0					$self->{_target_coli} = $self->{_in}->col_to_i($self->{_target_column});
242						croak $self->{_prog} . ": target column " . $self->{_target_column} . " is not in input stream.\n"
243	0	0				if (!defined($self->{_target_coli}));
244
245						# Sanity check user's input to avoid injection attacks.
246						croak $self->{_prog} . ": bad element separator.\n"
247	0	0				if ($self->{_element_separator} =~ /\'/);
248						croak $self->{_prog} . ": bad empty value.\n"
249	0	0				if ($self->{_empty} =~ /\'/);
250
251	0					$self->finish_io_option('output', -clone => $self->{_in}, -outputheader => 'delay');
252	0	0				if ($self->{_enumerate}) {
253						$self->{_out}->col_create($self->{_destination_column})
254	0	0				or croak $self->{_prog} . ": cannot create column '" . $self->{_destination_column} . "' (maybe it already existed?)\n";
255						};
256						}
257
258						=head2 run
259
260						$filter->run();
261
262						Internal: run over each rows.
263
264						=cut
265						sub run ($) {
266	0		0	1		my($self) = @_;
267
268	0					my $read_fastpath_sub = $self->{_in}->fastpath_sub();
269	0					my $write_fastpath_sub = $self->{_out}->fastpath_sub();
270
271	0					my $empty = $self->{_empty};
272	0					my $enum_coli = undef;
273	0	0				if ($self->{_enumerate}) {
274	0					$enum_coli = $self->{_out}->col_to_i($self->{_destination_column});
275	0	0				croak $self->{_prog} . ": enumeration column " . $self->{_destion_column} . " doesn't exist, even though we created it.\n"
276						if (!defined($enum_coli));
277						};
278
279
280						my($loop) = q'{
281						my $fref;
282						while ($fref = &$read_fastpath_sub()) {
283						my @p = split(/' . quotemeta($self->{_elem_separator}) . '/, $fref->[' . $self->{_target_coli} . ']);
284						push(@p, undef) if ($#p == -1);
285						my($i) = 0;
286						foreach (@p) {
287						if (!defined($_) \|\| $_ eq "") {
288						' . (!defined($empty) ? "next;\n" : '$_ = ' . "'" . $empty . "';" ) . '
289						};
290	0	0				$fref->[' . $self->{_target_coli} . '] = $_;' .
		0
291						(defined($enum_coli) ? ' $fref->[' . $enum_coli . '] = $i++;' : '') . '
292						&$write_fastpath_sub($fref);
293						};
294						};
295						}';
296	0					eval $loop;
297	0	0				$@ && croak $self->{_prog} . ": interal eval error: $@.\ncode:\n$loop";
298						}
299
300
301						=head1 AUTHOR and COPYRIGHT
302
303						Copyright (C) 1991-2008 by John Heidemann
304
305						This program is distributed under terms of the GNU general
306						public license, version 2. See the file COPYING
307						with the distribution for details.
308
309						=cut
310
311						1;
312