File Coverage

blib/lib/Text/NumericData/App/txdfilter.pm
Criterion Covered Total %
statement 109 129 84.5
branch 40 70 57.1
condition 6 18 33.3
subroutine 9 9 100.0
pod 0 7 0.0
total 164 233 70.3


line stmt bran cond sub pod time code
1             package Text::NumericData::App::txdfilter;
2              
3 1     1   647 use Text::NumericData::App;
  1         3  
  1         30  
4              
5 1     1   5 use strict;
  1         2  
  1         1401  
6              
7             # This is just a placeholder because of a past build system bug.
8             # The one and only version for Text::NumericData is kept in
9             # the Text::NumericData module itself.
10             our $VERSION = '1';
11             $VERSION = eval $VERSION;
12              
13             #the infostring says it all
14             my $infostring = 'filter textual data files
15              
16             This program filters/transforms textual data files (pipe operation) concering the syntax and header stuff. The data itself is preserved. Any Parameters after options are file title and data column titles (overriding the named parameters).';
17              
18             our @ISA = ('Text::NumericData::App');
19              
20             sub new
21             {
22 1     1 0 105 my $class = shift;
23 1         15 my @pars =
24             (
25             'touchdata',1,'','touch the data lines (otherwise just copy them)',
26             'touchhead',1,'','touch the header (otherwise just copy)',
27             'newhead',0,'n','make completely new header',
28             'comment',[],'C','comment to include between file title and column titles, lines in array',
29             'headlines',undef,'H','use this fixed number of lines as header (overriding any heuristics)',
30             'delaftercom',0,'D','delete any comment lines after data',
31             'lhex','','L','regex for last header line (alternative to fdex)',
32             'fdex','','F','regex for first data line (alterative to lhex)',
33             'title',undef,'t','new title',
34             'coltitles',[],'i','new column titles (as in "title1","title2","title3")',
35             'modtitles',{},'m','modify existing titles... hash with column indices as key, in Perl: (1=>"NeSpalte",4=>"AndereSpalte")',
36             'origin',0,'o','create Origin-friendly format with tab separation and coltitles as only header line and NO comment character, triggers also quote=0 and delaftercom=1',
37             'data',1,'','include the data in printout',
38             'head',1,'','include the header in printout',
39             'history',0,'','keep old title(s) lines as historic comments (writing new overall title before, new column titles below), otherwise replace them',
40             'index',0,'x','Add a dataset index as first column (maybe just to make other tools happy so with the data lines not starting with text). You can influence the column name via modtitles.'
41             );
42              
43 1         19 return $class->SUPER::new
44             ({
45             parconf=>
46             {
47             info=>$infostring
48             # default copyright
49             # default version
50             }
51             ,pardef=>\@pars
52             ,pipemode=>1
53             ,pipe_init=>\&prepare
54             ,pipe_begin=>\&init
55             ,pipe_line=>\&process_line
56             ,pipe_end=>\&endhook
57             });
58             }
59              
60             sub prepare
61             {
62 7     7 0 17 my $self = shift;
63 7         11 my $param = $self->{param};
64              
65 7 100       21 if($param->{origin})
66             {
67 1         3 $param->{comchar} = '';
68 1         3 $param->{outsep} = "\t";
69 1         2 $param->{quote} = 0;
70 1         2 $param->{delaftercom} = 1;
71             }
72              
73             #plain command line parameters are file and column titles
74 7 100       11 $self->{title} = @{$self->{argv}} ? shift(@{$self->{argv}}) : $param->{title};
  7         22  
  2         10  
75 7 100       10 $self->{titles} = @{$self->{argv}} ? $self->{argv} : $param->{coltitles};
  7         22  
76             # precompile header/data matches
77 7 50       21 $self->{lhex} = qr/$param->{lhex}/ if $param->{lhex} ne '';
78 7 50       19 $self->{fdex} = qr/$param->{fdex}/ if $param->{fdex} ne '';
79              
80 7         21 return 0;
81             }
82              
83             sub init
84             {
85 7     7 0 566 my $self = shift;
86 7         39 $self->new_txd();
87             #storage for old header lines
88 7         19 $self->{headlines} = [];
89             #counter/switch
90 7         15 $self->{l} = 0;
91 7         24 $self->{lasthead} = 0;
92             }
93              
94             sub process_line
95             {
96 32     32 0 970 my $self = shift;
97 32         58 my $c = $self->{txd};
98 32         49 my $param = $self->{param};
99 32         53 my $pre = ''; # prepend text just before output
100              
101 32 100       79 if(!$self->{state}{data})
102             {
103             #maybe still in header
104 26         37 ++$self->{l};
105 26         43 my $is_data = 0;
106 26 50 33     72 if(defined $param->{headlines} and $self->{l} > $param->{headlines})
107             {
108 0         0 $is_data = 1;
109             } else {
110 26 50       85 $is_data = $self->{lasthead} ? 1 : $c->line_check($_[0]);
111             # Behaviour when both expressions are specified:
112             # The first one that triggers defines beginning of data part.
113             # An idea would be to intentionally skip a part between.
114             # Think about that ...
115 26 50 33     115 if(!$self->{lasthead} and defined $self->{lhex})
116             {
117 0         0 $is_data = 0;
118 0         0 $self->{lasthead} = $_[0] =~ $self->{lhex};
119             }
120 26 50       65 if(defined $self->{fdex})
121             {
122             # possibly overriding line_check
123 0         0 $is_data = $_[0] =~ $self->{fdex};
124             }
125             }
126             # End header on specified number of lines or when thinking that data was found.
127 26 100       52 if($is_data)
128             {
129             #first data line found
130 4         13 $self->{state}{data} = 1;
131              
132 4         9 $self->header_workout($pre);
133             }
134             else #collect headlines
135             {
136 22         52 my $h = $_[0];
137 22         77 $c->make_naked($h);
138 22         44 push(@{$self->{headlines}},$h);
  22         66  
139 22         58 $_[0] = '';
140             }
141             }
142 32 100       77 if($self->{state}{data})
143             {
144             # skip data section if so desired
145 10 50       22 unless($param->{data})
146             {
147 0         0 $_[0] = '';
148             }
149             else
150             {
151             # data line processing
152             # This logic is not nested right, code duplication has to go.
153 10 0       23 unless($param->{delaftercom})
    50          
154             {
155             # normal handling, repeat everything unless unworthy
156 10 50       39 if($_[0] eq $c->{config}{lineend})
    50          
157             {
158 0 0       0 $_[0] = '' if $param->{noempty};
159             }
160             elsif($param->{touchdata})
161             {
162 10         33 my $d = $c->line_data($_[0]);
163 0         0 unshift(@{$d}, ++$self->{index})
164 10 50       26 if $param->{index};
165 10         16 $_[0] = ${$c->data_line($d)};
  10         30  
166             }
167             }
168 0         0 elsif($c->line_check($_,1))
169             {
170 0         0 my $d = $c->line_data($_[0]);
171 0         0 unshift(@{$d}, ++$self->{index})
172 0 0       0 if $param->{index};
173 0         0 $_[0] = ${$c->data_line($d)}
174 0 0       0 if($param->{touchdata});
175             }
176 0         0 else{ $_[0] = ''; }
177             }
178             }
179 32 100       125 $_[0] = $pre.$_[0] if $pre ne '';
180             }
181              
182             sub endhook
183             {
184 7     7 0 176 my $self = shift;
185             # If there was no data, still produce a header when it was given via command line.
186 7 100       29 $self->header_workout(@_) unless($self->{state}{data});
187             }
188              
189             # generic helper for applying modtitles (actually, just numeric-key hash to array)
190             sub mod_titles
191             {
192 6     6 0 13 my ($t,$m) = @_;
193 6         10 foreach my $k (keys %{$m})
  6         20  
194             {
195 0 0       0 if($k =~ /^\d+$/)
196             {
197 0         0 $t->[$k-1] = $m->{$k};
198             }
199             }
200             }
201              
202             # construct header and prepend to current line, if demanded
203             sub header_workout
204             {
205 7     7 0 12 my $self = shift;
206 7         14 my $param = $self->{param};
207 7 50       16 return unless $param->{head};
208              
209             #now print header
210 7         13 my $c = $self->{txd};
211              
212 7 100       19 unless($param->{touchhead})
213             {
214 1         11 my $pre = join($c->get_end(), @{$self->{headlines}});
  1         43  
215 1 50       11 $pre .= $c->get_end() if $pre ne '';
216 1         5 $_[0] = $pre.$_[0];
217 1         4 return;
218             }
219              
220 6 100       16 $c->{title} = $self->{title} if defined $self->{title};
221 6 100       11 $c->{titles} = $self->{titles} if @{$self->{titles}};
  6         20  
222 0         0 unshift(@{$c->{titles}}, 'index')
223 6 0 33     15 if($param->{index} and defined $c->{titles} and @{$c->{titles}});
  0   33     0  
224 6         21 mod_titles($c->{titles}, $self->{param}{modtitles});
225 6         16 my $pre = '';
226 6 100       16 if($param->{origin})
227             {
228             #mangle it for Origin...
229             #Origin uses "titles" in first two lines as legend text
230             #since normally the same kind of data from different sources
231             #is identified in a plot via the legend, any comment text
232             #provided for the file is repeated here for every column
233             #looks senseless in file, makes sense in Origin
234             #print STDERR "TODO: revisit Origin format ... is this really the best way?\n";
235 1         14 my $titles = $c->{titles};
236 1         2 foreach my $com (@{$param->{comment}})
  1         4  
237             {
238 1         3 my @car = ();
239 1         2 for(my $i = 0; $i <= $#{$titles}; ++$i)
  3         8  
240             {
241 2         5 push(@car, $com);
242             }
243 1         3 $c->{titles} = \@car;
244 1         3 $pre .= ${$c->title_line()};
  1         3  
245             }
246             #the "real" title is still there after any comments
247 1         3 $c->{titles} = $titles;
248 1         2 $pre .= ${$c->title_line()};
  1         4  
249             }
250             else
251             {
252 5 50       14 my $old_comments = $param->{history} ? $self->{headlines} : $c->{comments};
253             #file title
254 5 50       12 if(defined $c->{title}){ $pre .= ${$c->comment_line($c->{title})}; }
  5         10  
  5         17  
255 5         10 foreach my $l (@{$param->{comment}})
  5         13  
256             {
257 1         2 $pre .= ${$c->comment_line($l)};
  1         3  
258             }
259             #old stuff
260 5 50       14 unless($param->{newhead})
261             {
262 5         10 foreach my $h (@{$old_comments})
  5         11  
263             {
264 6         8 $pre .= ${$c->comment_line($h)};
  6         19  
265             }
266             }
267             #only if new one is desired
268             # Is that logic complete?
269 5 0 33     8 $pre .= ${$c->title_line()} if (@{$c->{titles}} or @{$self->{titles}} or (keys %{$param->{modtitles}}));
  5   33     16  
  5         25  
  0         0  
  0         0  
270             }
271 6         26 $_[0] = $pre.$_[0];
272             }
273              
274              
275             1;