File Coverage

blib/lib/Text/NumericData/App/txdfilter.pm
Criterion Covered Total %
statement 109 129 84.5
branch 40 70 57.1
condition 6 18 33.3
subroutine 9 9 100.0
pod 0 7 0.0
total 164 233 70.3


line stmt bran cond sub pod time code
1             package Text::NumericData::App::txdfilter;
2              
3 1     1   573 use Text::NumericData::App;
  1         2  
  1         39  
4              
5 1     1   5 use strict;
  1         5  
  1         1520  
6              
7             # This is just a placeholder because of a past build system bug.
8             # The one and only version for Text::NumericData is kept in
9             # the Text::NumericData module itself.
10             our $VERSION = '1';
11             $VERSION = eval $VERSION;
12              
13             #the infostring says it all
14             my $infostring = 'filter textual data files
15              
16             This program filters/transforms textual data files (pipe operation) concering the syntax and header stuff. The data itself is preserved. Any Parameters after options are file title and data column titles (overriding the named parameters).';
17              
18             our @ISA = ('Text::NumericData::App');
19              
20             sub new
21             {
22 1     1 0 88 my $class = shift;
23 1         13 my @pars =
24             (
25             'touchdata',1,'','touch the data lines (otherwise just copy them)',
26             'touchhead',1,'','touch the header (otherwise just copy)',
27             'newhead',0,'n','make completely new header',
28             'comment',[],'C','comment to include between file title and column titles, lines in array',
29             'headlines',undef,'H','use this fixed number of lines as header (overriding any heuristics)',
30             'delaftercom',0,'D','delete any comment lines after data',
31             'lhex','','L','regex for last header line (alternative to fdex)',
32             'fdex','','F','regex for first data line (alterative to lhex)',
33             'title',undef,'t','new title',
34             'coltitles',[],'i','new column titles (as in "title1","title2","title3")',
35             'modtitles',{},'m','modify existing titles... hash with column indices as key, in Perl: (1=>"NeSpalte",4=>"AndereSpalte")',
36             'origin',0,'o','create Origin-friendly format with tab separation and coltitles as only header line and NO comment character, triggers also quote=0 and delaftercom=1',
37             'data',1,'','include the data in printout',
38             'head',1,'','include the header in printout',
39             'history',0,'','keep old title(s) lines as historic comments (writing new overall title before, new column titles below), otherwise replace them',
40             'index',0,'x','Add a dataset index as first column (maybe just to make other tools happy so with the data lines not starting with text). You can influence the column name via modtitles.'
41             );
42              
43 1         14 return $class->SUPER::new
44             ({
45             parconf=>
46             {
47             info=>$infostring
48             # default copyright
49             # default version
50             }
51             ,pardef=>\@pars
52             ,pipemode=>1
53             ,pipe_init=>\&prepare
54             ,pipe_begin=>\&init
55             ,pipe_line=>\&process_line
56             ,pipe_end=>\&endhook
57             });
58             }
59              
60             sub prepare
61             {
62 7     7 0 12 my $self = shift;
63 7         11 my $param = $self->{param};
64              
65 7 100       18 if($param->{origin})
66             {
67 1         3 $param->{comchar} = '';
68 1         2 $param->{outsep} = "\t";
69 1         3 $param->{quote} = 0;
70 1         2 $param->{delaftercom} = 1;
71             }
72              
73             #plain command line parameters are file and column titles
74 7 100       11 $self->{title} = @{$self->{argv}} ? shift(@{$self->{argv}}) : $param->{title};
  7         17  
  2         6  
75 7 100       11 $self->{titles} = @{$self->{argv}} ? $self->{argv} : $param->{coltitles};
  7         19  
76             # precompile header/data matches
77 7 50       20 $self->{lhex} = qr/$param->{lhex}/ if $param->{lhex} ne '';
78 7 50       17 $self->{fdex} = qr/$param->{fdex}/ if $param->{fdex} ne '';
79              
80 7         16 return 0;
81             }
82              
83             sub init
84             {
85 7     7 0 557 my $self = shift;
86 7         31 $self->new_txd();
87             #storage for old header lines
88 7         16 $self->{headlines} = [];
89             #counter/switch
90 7         12 $self->{l} = 0;
91 7         21 $self->{lasthead} = 0;
92             }
93              
94             sub process_line
95             {
96 32     32 0 784 my $self = shift;
97 32         64 my $c = $self->{txd};
98 32         53 my $param = $self->{param};
99 32         49 my $pre = ''; # prepend text just before output
100              
101 32 100       74 if(!$self->{state}{data})
102             {
103             #maybe still in header
104 26         40 ++$self->{l};
105 26         39 my $is_data = 0;
106 26 50 33     70 if(defined $param->{headlines} and $self->{l} > $param->{headlines})
107             {
108 0         0 $is_data = 1;
109             } else {
110 26 50       114 $is_data = $self->{lasthead} ? 1 : $c->line_check($_[0]);
111             # Behaviour when both expressions are specified:
112             # The first one that triggers defines beginning of data part.
113             # An idea would be to intentionally skip a part between.
114             # Think about that ...
115 26 50 33     112 if(!$self->{lasthead} and defined $self->{lhex})
116             {
117 0         0 $is_data = 0;
118 0         0 $self->{lasthead} = $_[0] =~ $self->{lhex};
119             }
120 26 50       54 if(defined $self->{fdex})
121             {
122             # possibly overriding line_check
123 0         0 $is_data = $_[0] =~ $self->{fdex};
124             }
125             }
126             # End header on specified number of lines or when thinking that data was found.
127 26 100       52 if($is_data)
128             {
129             #first data line found
130 4         47 $self->{state}{data} = 1;
131              
132 4         14 $self->header_workout($pre);
133             }
134             else #collect headlines
135             {
136 22         39 my $h = $_[0];
137 22         64 $c->make_naked($h);
138 22         42 push(@{$self->{headlines}},$h);
  22         53  
139 22         44 $_[0] = '';
140             }
141             }
142 32 100       71 if($self->{state}{data})
143             {
144             # skip data section if so desired
145 10 50       21 unless($param->{data})
146             {
147 0         0 $_[0] = '';
148             }
149             else
150             {
151             # data line processing
152             # This logic is not nested right, code duplication has to go.
153 10 0       22 unless($param->{delaftercom})
    50          
154             {
155             # normal handling, repeat everything unless unworthy
156 10 50       33 if($_[0] eq $c->{config}{lineend})
    50          
157             {
158 0 0       0 $_[0] = '' if $param->{noempty};
159             }
160             elsif($param->{touchdata})
161             {
162 10         27 my $d = $c->line_data($_[0]);
163 0         0 unshift(@{$d}, ++$self->{index})
164 10 50       29 if $param->{index};
165 10         16 $_[0] = ${$c->data_line($d)};
  10         23  
166             }
167             }
168 0         0 elsif($c->line_check($_,1))
169             {
170 0         0 my $d = $c->line_data($_[0]);
171 0         0 unshift(@{$d}, ++$self->{index})
172 0 0       0 if $param->{index};
173 0         0 $_[0] = ${$c->data_line($d)}
174 0 0       0 if($param->{touchdata});
175             }
176 0         0 else{ $_[0] = ''; }
177             }
178             }
179 32 100       101 $_[0] = $pre.$_[0] if $pre ne '';
180             }
181              
182             sub endhook
183             {
184 7     7 0 165 my $self = shift;
185             # If there was no data, still produce a header when it was given via command line.
186 7 100       25 $self->header_workout(@_) unless($self->{state}{data});
187             }
188              
189             # generic helper for applying modtitles (actually, just numeric-key hash to array)
190             sub mod_titles
191             {
192 6     6 0 59 my ($t,$m) = @_;
193 6         11 foreach my $k (keys %{$m})
  6         31  
194             {
195 0 0       0 if($k =~ /^\d+$/)
196             {
197 0         0 $t->[$k-1] = $m->{$k};
198             }
199             }
200             }
201              
202             # construct header and prepend to current line, if demanded
203             sub header_workout
204             {
205 7     7 0 12 my $self = shift;
206 7         11 my $param = $self->{param};
207 7 50       20 return unless $param->{head};
208              
209             #now print header
210 7         9 my $c = $self->{txd};
211              
212 7 100       16 unless($param->{touchhead})
213             {
214 1         6 my $pre = join($c->get_end(), @{$self->{headlines}});
  1         40  
215 1 50       13 $pre .= $c->get_end() if $pre ne '';
216 1         5 $_[0] = $pre.$_[0];
217 1         3 return;
218             }
219              
220 6 100       15 $c->{title} = $self->{title} if defined $self->{title};
221 6 100       9 $c->{titles} = $self->{titles} if @{$self->{titles}};
  6         17  
222 0         0 unshift(@{$c->{titles}}, 'index')
223 6 0 33     14 if($param->{index} and defined $c->{titles} and @{$c->{titles}});
  0   33     0  
224 6         19 mod_titles($c->{titles}, $self->{param}{modtitles});
225 6         14 my $pre = '';
226 6 100       15 if($param->{origin})
227             {
228             #mangle it for Origin...
229             #Origin uses "titles" in first two lines as legend text
230             #since normally the same kind of data from different sources
231             #is identified in a plot via the legend, any comment text
232             #provided for the file is repeated here for every column
233             #looks senseless in file, makes sense in Origin
234             #print STDERR "TODO: revisit Origin format ... is this really the best way?\n";
235 1         3 my $titles = $c->{titles};
236 1         2 foreach my $com (@{$param->{comment}})
  1         3  
237             {
238 1         3 my @car = ();
239 1         3 for(my $i = 0; $i <= $#{$titles}; ++$i)
  3         8  
240             {
241 2         4 push(@car, $com);
242             }
243 1         3 $c->{titles} = \@car;
244 1         2 $pre .= ${$c->title_line()};
  1         4  
245             }
246             #the "real" title is still there after any comments
247 1         3 $c->{titles} = $titles;
248 1         2 $pre .= ${$c->title_line()};
  1         3  
249             }
250             else
251             {
252 5 50       10 my $old_comments = $param->{history} ? $self->{headlines} : $c->{comments};
253             #file title
254 5 50       12 if(defined $c->{title}){ $pre .= ${$c->comment_line($c->{title})}; }
  5         8  
  5         16  
255 5         10 foreach my $l (@{$param->{comment}})
  5         12  
256             {
257 1         2 $pre .= ${$c->comment_line($l)};
  1         3  
258             }
259             #old stuff
260 5 50       15 unless($param->{newhead})
261             {
262 5         8 foreach my $h (@{$old_comments})
  5         11  
263             {
264 6         8 $pre .= ${$c->comment_line($h)};
  6         12  
265             }
266             }
267             #only if new one is desired
268             # Is that logic complete?
269 5 0 33     8 $pre .= ${$c->title_line()} if (@{$c->{titles}} or @{$self->{titles}} or (keys %{$param->{modtitles}}));
  5   33     14  
  5         14  
  0         0  
  0         0  
270             }
271 6         23 $_[0] = $pre.$_[0];
272             }
273              
274              
275             1;