File Coverage

blib/lib/Fsdb/Filter/cgi_to_db.pm
Criterion Covered Total %
statement 18 91 19.7
branch 0 28 0.0
condition 0 3 0.0
subroutine 6 18 33.3
pod 5 5 100.0
total 29 145 20.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2              
3             #
4             # cgi_to_db.pm
5             # Copyright (C) 1998-2007 by John Heidemann
6             #
7             # This program is free software; you can redistribute it and/or
8             # modify it under the terms of the GNU General Public License,
9             # version 2, as published by the Free Software Foundation.
10             #
11             # This program is distributed in the hope that it will be useful,
12             # but WITHOUT ANY WARRANTY; without even the implied warranty of
13             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14             # GNU General Public License for more details.
15             #
16             # You should have received a copy of the GNU General Public License along
17             # with this program; if not, write to the Free Software Foundation, Inc.,
18             # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19             #
20              
21             package Fsdb::Filter::cgi_to_db;
22              
23             =head1 NAME
24              
25             cgi_to_db - convert stored CGI files (from CGI.pm) to fsdb
26              
27             =head1 SYNOPSIS
28              
29             cgi_to_db [-duU] [-e EmptyValue] [cgi-files...]
30              
31             =head1 DESCRIPTION
32              
33             Converts all stored CGI files (from CGI.pm) to fsdb,
34             optionally unescaping the contents.
35             When contents are unescaped, CR NL is recoded as ``\n''.
36              
37             Output is always in fsdb list format with double space (type ``S'')
38             field separator.
39              
40             Unlike most Fsdb programs, the input to this program is
41             I usually from standard input. However, the program will take
42             C<-i> options.
43              
44             This program requires temporary storage equal to the size of the data
45             (so that it can handle the case of different entries having different
46             headers).
47              
48             =head1 OPTIONS
49              
50             =over 4
51              
52             =item B<-u> or B<--unescape>
53              
54             do unescape data, converting CGI escape codes like %xx
55             to regular characters (default)
56              
57             =item B<-U> or B<--nounescape>
58              
59             do I unescape data, but leave it CGI-encoded
60              
61             =item B<-e E> or B<--empty E>
62              
63             give value E as the value for empty (null) records
64              
65             =item B<-T TmpDir>
66              
67             where to put tmp files.
68             Also uses environment variable TMPDIR, if -T is
69             not specified.
70             Default is /tmp.
71              
72             =back
73              
74             =for comment
75             begin_standard_fsdb_options
76              
77             This module also supports the standard fsdb options:
78              
79             =over 4
80              
81             =item B<-d>
82              
83             Enable debugging output.
84              
85             =item B<-i> or B<--input> InputSource
86              
87             Read from InputSource, typically a file name, or C<-> for standard input,
88             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
89              
90             =item B<-o> or B<--output> OutputDestination
91              
92             Write to OutputDestination, typically a file name, or C<-> for standard output,
93             or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
94              
95             =item B<--autorun> or B<--noautorun>
96              
97             By default, programs process automatically,
98             but Fsdb::Filter objects in Perl do not run until you invoke
99             the run() method.
100             The C<--(no)autorun> option controls that behavior within Perl.
101              
102             =item B<--help>
103              
104             Show help.
105              
106             =item B<--man>
107              
108             Show full manual.
109              
110             =back
111              
112             =for comment
113             end_standard_fsdb_options
114              
115              
116             =head1 SAMPLE USAGE
117              
118             =head2 Input:
119              
120             file A (TEST/cgi_to_db_ex.in):
121              
122             name=test
123             id=111-11-1111
124             email=test%40usc.edu
125             submit_time=Tue%20Jan%2014%2011%3A32%3A39%202003
126             =
127              
128             file B (TEST/cgi_to_db_ex.in-2):
129              
130             name=test2
131             id=222-22-2222
132             email=test2%40usc.edu
133             newfield=foo
134             emptyfield=
135             submit_time=Tue%20Jan%2024%2022%3A32%3A39%202003
136             =
137              
138              
139             =head2 Command:
140              
141             cgi_to_db TEST/cgi_to_db_ex.in TEST/cgi_to_db_ex.in-2
142              
143              
144             =head2 Output:
145              
146             #fsdb -R C -F S name id email submit_time newfield emptyfield
147             name: test
148             id: 111-11-1111
149             email: test\@usc.edu
150             submit_time: Tue Jan 14 11:32:39 2003
151              
152             name: test2
153             id: 222-22-2222
154             email: test2\@usc.edu
155             newfield: foo
156             emptyfield: -
157             submit_time: Tue Jan 24 22:32:39 2003
158              
159             # | cgi_to_db TEST/cgi_to_db_ex.in TEST/cgi_to_db_ex.in-2
160              
161              
162             =head1 SEE ALSO
163              
164             L.
165             L.
166             L.
167             L
168              
169             =head1 CLASS FUNCTIONS
170              
171             =cut
172              
173             @ISA = qw(Fsdb::Filter);
174             $VERSION = 2.0;
175              
176 1     1   4365 use strict;
  1         1  
  1         23  
177 1     1   3 use Pod::Usage;
  1         1  
  1         61  
178 1     1   3 use Carp;
  1         2  
  1         36  
179              
180 1     1   3 use Fsdb::Filter;
  1         1  
  1         13  
181 1     1   2 use Fsdb::IO::Writer;
  1         1  
  1         14  
182 1     1   3 use Fsdb::Support::NamedTmpfile;
  1         1  
  1         812  
183              
184              
185             =head2 new
186              
187             $filter = new Fsdb::Filter::cgi_to_db(@arguments);
188              
189             Create a new cgi_to_db object, taking command-line arguments.
190              
191             =cut
192              
193             sub new ($@) {
194 0     0 1   my $class = shift @_;
195 0           my $self = $class->SUPER::new(@_);
196 0           bless $self, $class;
197 0           $self->set_defaults;
198 0           $self->parse_options(@_);
199 0           $self->SUPER::post_new();
200 0           return $self;
201             }
202              
203              
204             =head2 set_defaults
205              
206             $filter->set_defaults();
207              
208             Internal: set up defaults.
209              
210             =cut
211              
212             sub set_defaults ($) {
213 0     0 1   my($self) = @_;
214 0           $self->SUPER::set_defaults();
215 0           $self->{_unescape} = 1;
216 0           $self->{_save_in_filename} = undef;
217 0           $self->set_default_tmpdir;
218             }
219              
220             =head2 parse_options
221              
222             $filter->parse_options(@ARGV);
223              
224             Internal: parse command-line arguments.
225              
226             =cut
227              
228             sub parse_options ($@) {
229 0     0 1   my $self = shift @_;
230              
231 0           my(@argv) = @_;
232             $self->get_options(
233             \@argv,
234 0     0     'help|?' => sub { pod2usage(1); },
235 0     0     'man' => sub { pod2usage(-verbose => 2); },
236             'autorun!' => \$self->{_autorun},
237             'd|debug+' => \$self->{_debug},
238             'e|empty=s' => \$self->{_empty},
239 0     0     'i|input=s@' => sub { $self->parse_io_option('inputs', @_); },
240       0     'inputs' => sub {}, # for compatibility with dbmerge, but here --inputs is implicit
241             'log!' => \$self->{_logprog},
242 0     0     'o|output=s' => sub { $self->parse_io_option('output', @_); },
243             'u|unescape!' => \$self->{_unescape},
244             'T|tmpdir|tempdir=s' => \$self->{_tmpdir},
245 0     0     'U' => sub { $self->{_unescape} = undef; },
246 0     0     '<>' => sub { $self->parse_io_option('inputs', @_); },
247 0 0         ) or pod2usage(2);
248             }
249              
250             =head2 setup
251              
252             $filter->setup();
253              
254             Internal: setup, parse headers.
255              
256             =cut
257              
258             sub setup ($) {
259 0     0 1   my($self) = @_;
260              
261             # Sigh, can't do anything, because multiple inputs and no clue about output format.
262             }
263              
264             =head2 run
265              
266             $filter->run();
267              
268             Internal: run over each rows.
269              
270             =cut
271             sub run ($) {
272 0     0 1   my($self) = @_;
273              
274             #
275             # Scan all the input, saving it, so we know the column names.
276             # Note the tmpfile is NOT in fsdb format.
277             #
278 0           $self->{_save_in_filename} = Fsdb::Support::NamedTmpfile::alloc($self->{_tmpdir});
279 0 0         open (TMP, ">$self->{_save_in_filename}") or croak $self->{_prog} . ": cannot write to " . $self->{_save_in_filename} . "\n";
280 0           my %columns_found;
281             my @columns_ordered;
282 0           foreach my $fn (@{$self->{_inputs}}) {
  0            
283 0 0         open(IN, "<$fn") or croak $self->{_prog} . ": cannot open input file $fn\n";
284 0           my $at_end_of_record = 1;
285 0           while () {
286 0           print TMP $_;
287 0           chomp;
288 0 0         if ($_ eq '=') {
289 0           $at_end_of_record = 1;
290 0           next;
291             };
292 0           $at_end_of_record = 0;
293 0           my($key, $value) = m/^([^=]*)=(.*)$/;
294 0 0         croak $self->{_prog} . " missing key in $_ in file $fn" if (!defined($key));
295 0 0         next if (defined($columns_found{$key}));
296             # new one!
297 0           $columns_found{$key} = 1;
298 0           push(@columns_ordered, $key);
299             };
300 0           close IN;
301 0 0         print TMP "=\n" if (!$at_end_of_record);
302             };
303 0           close TMP;
304              
305             #
306             # Now go back and do the real output.
307             #
308 0           $self->finish_io_option('output', -fscode => 'S', -rscode => 'C',
309             -cols => \@columns_ordered);
310 0 0         open (TMP, "<$self->{_save_in_filename}") or croak $self->{_prog} . ": cannot read from " . $self->{_save_in_filename} . "\n";
311 0           my %row;
312 0           my $at_end_of_record = 1;
313 0           my $unescape = $self->{_unescape};
314 0           while () {
315 0           chomp;
316 0 0         if ($_ eq '=') {
317 0           $self->{_out}->write_row_from_href(\%row);
318 0           %row = ();
319 0           $at_end_of_record = 1;
320 0           next;
321             };
322 0           $at_end_of_record = 0;
323 0           my($key, $value) = m/^([^=]*)=(.*)$/;
324 0 0         croak $self->{_prog} . ": interal error, empty $key." if (!defined($key));
325 0 0         croak $self->{_prog} . ": interal error, found key $key in second pass." if (!defined($columns_found{$key}));
326              
327             #
328             # deal with the value
329             #
330 0 0 0       $value = $self->{_empty} if (!defined($value) || $value eq '');
331 0 0         if ($unescape) {
332             # map newlines to something
333 0           $value =~ s/%0D%0A/%0A/g; # change CR NL to just NL
334 0           $value =~ s/%0A/ \\n /g; # change NL to my thing
335 0           $value =~ s/%09/ /g; # tabs to spaces
336 0           $value =~ s/%(..)/chr(hex($1))/eg; # now general unescape
  0            
337             };
338 0           $value =~ s/ +/ /g; # prune double spaces (for -FS option)
339 0           $value =~ s/\r//g; # second check on CRs
340 0           $row{$key} = $value;
341             };
342 0 0         croak $self->{_prog} . ": internal error, tmpfile finished in middle of record.\n" if (!$at_end_of_record);
343 0           close TMP;
344             }
345              
346             =head1 AUTHOR and COPYRIGHT
347              
348             Copyright (C) 1991-2008 by John Heidemann
349              
350             This program is distributed under terms of the GNU general
351             public license, version 2. See the file COPYING
352             with the distribution for details.
353              
354             =cut
355              
356             1;