File Coverage

blib/lib/SuperSplit.pm
Criterion Covered Total %
statement 54 83 65.0
branch 15 44 34.0
condition 0 5 0.0
subroutine 12 17 70.5
pod 6 6 100.0
total 87 155 56.1


\n
line stmt bran cond sub pod time code
1             package SuperSplit;
2 1     1   711 use strict;
  1         1  
  1         86  
3              
4             =head1 NAME
5              
6             SuperSplit - Provides methods to split/join in two or more dimensions
7              
8             =head1 SYNOPSIS
9              
10             use SuperSplit ; #or qw/!:all supersplit/ |which function you want to use
11            
12             #first example: split on newlines and whitespace and print
13             #the same data joined on tabs and whitespace. The split works on STDIN
14             #
15             print superjoin( supersplit() ); #behaves like while (<>)
16             {s/\s+/\t/g;print;}
17            
18             #second: split a table in a text file, and join it to HTML
19             #
20             my $array2D = supersplit( \*INPUT ) #filehandle must be open
21             my $htmltable = superjoin( '', "
",
22             $array2D );
23             $htmltable = "\n \n
" . $htmltable .
24             "
";
25             print $htmltable;
26            
27             #third: perl allows you to have varying number of columns in a row,
28             # so don't stop with simple tables. To split a piece of text into
29             # paragraphs, than words, try this:
30             #
31             undef $/;
32             $_ = <>;
33             tr/.!();:?/ /; #remove punctiation
34             my $array = supersplit( '\s+', '\n\s*\n', $_ );
35             # now you can do something nifty as counting the number of words in each
36             # paragraph
37             my $i = 0;
38             for my $rowref (@$array) {
39             print "Found ".@$rowref." \twords in paragraph \t".++$i."\n";
40             }
41            
42             #other uses:
43             $a = supersplit( 2 ); #behaves like supersplit(), but stops with the
44             second column
45             $b = supersplit_open( "<$file", 2 ); #as before, but opens $file for
46             input
47             $c = supersplit_open( "<$file"); #as before, but splits as much as it can
48             $d = supersplit_nolimit( 3); #Hopelessly tries to split on 3.
49             $e = supersplit_limits( [ ], [2,2] ); #$a, but returns 2x2 array
50             $f = supersplit_hashref( { separators => [ ], limits => [2,2],
51             filehandle => \*STDIN }); #as before, but using anonhash to determine
52             inputs
53              
54             =head1 DESCRIPTION
55              
56             Supersplit is just a consequence of the possibility to use
57             multi-dimensional
58             arrays in perl. Because that is possible, one also wants a way to
59             convenienently split data into a nD-array (at least I want to). And vice
60             versa, of course. Supersplit/join just do that.
61              
62             Because I intend to use these methods in numerous one-liners and in my
63             collection of handy filters, an object interface is more often than not
64             cumbersome. So, this module exports six methods 'super...', but no
65             variables or globs of any kind. If you think modules shouldn't export
66             functions, period, use the object interface, SuperSplit::Obj. TIMTOWTDT
67              
68             If you don't like input magic, you can use the hashref variant. It uses
69             only little of that ;-).
70              
71             =over 4
72              
73             =item supersplit( @separator-list, $filehandleref || $string, $limit);
74              
75             The first method, supersplit, returns a nD-array. To do that, it needs
76             data and the strings to split with. Data may be provided as a reference
77             to
78             a filehandle, or as a string. If you want use a string for the data, you
79             MUST provide the strings to split with (>=3 argument mode). If you don't
80             provide data, supersplit works on STDIN. If you provide a filehandle
81             (like
82             \*INPUT), supersplit doesn't need the splitting strings, and
83             runs in 2D-mode by default. In both cases (STDIN or filehandle only) it
84             assumes columns are separated by whitespace, and rows are separated by
85             newlines. Strings are passed directly to split. If you provide more
86             separators, they will split the higher dimensions. If you only provide
87             one, it is treated like the column-separator, the row-separator defaults
88             to
89             newline.
90              
91             The separators are processed in reversed order, the last separator is
92             processed first. This is best explained with a simple whitespace
93             delimited
94             table:
95              
96             1 -1 4.32 new
97              
98             2 0 3.23 old
99              
100             3 -1 10.11 old
101              
102              
103             The default separator list, ('\s+', '\n') first splits on newlines,
104             resulting in three rows. Each row than is splitted on whitespace,
105             resulting in four columns every row. The last element of the resulting
106             array is found by $array->[2][3] (indici start at zero).
107              
108             You may pass an optional last parameter that contains an integer only.
109             This is passed to split as the LIMIT parameter. See
110             L for more details, it just limits the number of
111             times that split splits. The LIMIT paramter is only used in the last
112             dimension (aka, first delimiter). In case your string can be an
113             integer only (that means, no other characters present) and you have
114             more than two dimensions, you should use supersplit_nolimit, or
115             provide a bogus LIMIT like -1.
116              
117             A final remark an this function: It first tries to interpret your input
118             as
119             a filehandle and than as a string. Maybe you don't want that, if you are
120             using L for example. In that case, convert your object to a
121             string before passing it.
122              
123             Supersplit returns a multi-dimensional array or undef if an error
124             occurred.
125              
126             =item supersplit_nolimit
127              
128             Behaves like supersplit, except that is does not try to interpret the
129             last
130             parameter as the LIMIT parameter for split.
131              
132             =item supersplit_open
133              
134             Behaves like supersplit (including LIMIT behavior), except that it opens
135             the input string with open( INPUT, "$string" ). If that fails,
136             supersplit_open confesses, and it carps if INPUT turns out to be empty.
137             See L for more details.
138              
139             =item supersplit_limits( $fh || $string, $separator_arrayref,
140             $limits_arrayref)
141              
142             Behaves like supersplit, but the separator list must be provided as a
143             reference to an array, just as the list with LIMITs. If the LIMIT list
144             has less members than the separator list, the last dimensions will be
145             called
146             without LIMIT. Both the separators and limits are popped, that is the
147             lists
148             will be processed from right to left, just like the separator list in
149             previously descrived methods.
150              
151             This method can be used to parse tables that need a limit on
152             a higher dimension, I understand the .csv format is an example of that.
153              
154             =item supersplit_hashref( $hashref)
155              
156             This is just a wrapper around supersplit_limits. All arguments are passed
157             as members of the referenced hash. These members are: 'separators',
158             'limits',
159             'string', 'filehandle' and 'open'. The members 'separators' and 'limits'
160             must be
161             references to arrays. The method passed these references to
162             supersplit_limits,
163             see above for a description. On the other arguments, the method tries to
164             get 'string' first, than the 'filehandle' and if that fails tries to use
165             the 'open' member.
166              
167             =item superjoin( $colseparator, $rowseparator, $array2D );
168              
169             The fourth and last method, superjoin, takes a nD-array and returns it as
170             a
171             string. The default behavior assumes 2D-array. In the string, columns
172             (adjacent cells) are separated by the first argument provided. Rows
173             (normally lines) are separated by the second argument. Alternatively,
174             you
175             may give the 2D-array as the only argument. In that case, superjoin
176             joins
177             columns with a tab ("\t"), and rows with a newline ("\n"). If you have
178             more dimensions in your array, all separators for all dimensions should
179             be
180             provided. If you don't, superjoin stops at the second-last dimension.
181             Just as with supersplit, separators are processed in reversed order: the
182             last
183             separator/delimiter is processed first.
184              
185             Superjoin returns an undef if an error occurred, for example if you give a
186             ref to an hash. If your first dimension points to hashes or strings,
187             superjoin will return undef. Mixed arrays will break the code.
188              
189             =back
190              
191             =head1 AUTHOR
192              
193             Jeroen Elassaiss-Schaap, with great help from Ben Tilly, who rewrote most
194             of
195             the code for version 0.02.
196              
197             =head1 LICENSE
198              
199             Perl/ artisitic license
200              
201             =head1 STATUS
202              
203             Alpha
204              
205             =cut
206              
207 1     1   5 use Exporter;
  1         2  
  1         41  
208 1     1   5 use vars qw( %EXPORT_TAGS @ISA $VERSION @limit);
  1         5  
  1         175  
209             $VERSION = 0.06;
210             @ISA = qw( Exporter );
211             %EXPORT_TAGS = (
212             all => [ qw( supersplit superjoin supersplit_open supersplit_nolimit
213             supersplit_limits supersplit_hashref)],
214             standard => [ 'all' ],
215             minimal => [ qw( supersplit superjoin ) ]
216             );
217             Exporter::export_ok_tags('all');
218             Exporter::export_tags('all');
219             @limit = ();
220 1     1   5 use Carp;
  1         2  
  1         718  
221              
222             sub supersplit{
223 1     1 1 71 @_ = _limit( @_);
224 1         5 my $text = _text( pop );
225 1         4 _supersplit( @_, $text);
226             }
227              
228             sub supersplit_open{
229 0     0 1 0 @_ = _limit( @_);
230 0         0 my $text = _open( pop );
231 0         0 _supersplit( @_, $text);
232             }
233              
234             sub supersplit_nolimit{
235 0     0 1 0 my $text = _text( pop);
236 0         0 _supersplit( @_, $text);
237             }
238              
239             sub supersplit_limits{
240 0     0 1 0 my $limit_array = pop;
241 0 0       0 return undef unless( ref( $limit_array) eq 'ARRAY' );
242 0         0 @limit = @$limit_array;
243 0         0 my $separator_array = pop;
244 0 0       0 return undef unless( ref( $separator_array) eq 'ARRAY' );
245 0         0 supersplit_nolimit( @$separator_array, @_);
246             }
247              
248             sub supersplit_hashref{
249 0     0 1 0 my $input = shift;
250 0 0       0 return undef unless( ref( $input) eq 'HASH' );
251 0 0       0 my $limit_array = $input->{ limits } or return undef;
252 0 0       0 my $separator_array = $input->{ separators } or return undef;
253 0         0 my $string;
254 0         0 for (1) {
255             ($string = $input->{ string } and last)
256 0 0 0     0 if $input->{ string };
257             ($string = _text( $input->{ filehandle }), last)
258 0 0       0 if $input->{ filehandle };
259 0 0       0 ($string = _open( $input->{ 'open' } ), last)
260             if $input->{ 'open' };
261             }
262 0         0 supersplit_limits( $string, $separator_array, $limit_array);
263             }
264              
265             sub _supersplit{
266 1     1   3 my $text = pop;
267 1 50       4 $_[0] || ( $_[0] = '\s+' );
268 1 50       4 $_[1] || ( $_[1] = '\n' );
269 1         4 _split( @_, $text );
270             }
271              
272             sub _text{
273 1     1   2 my $fh = pop;
274 1 50       3 unless (defined($fh)) {
275 0         0 $fh = \*STDIN;
276             }
277 1     1   6 no strict;
  1         2  
  1         640  
278 1 50       3 do{ local $/ = undef; join '', <$fh>; } || $fh;
  1         5  
  1         20  
279             }
280              
281             sub _split{
282 3     3   7 my $text = pop;
283 3         5 my $limit = $limit[ $#_ ];
284 3         4 my $re = pop;
285 3         4 my @res;
286 3 50       32 @res = scalar( @limit) ? split( $re, $text, $limit) :
287             split( $re, $text );
288 3 100       19 if (@_) {
289 1         3 @res = map { _split( @_, $_) } @res;
  2         6  
290             }
291 3         12 \@res;
292             }
293              
294             sub _limit{
295 1     1   3 local $_ = $_[$#_];
296 1 50       8 @limit = (pop) if m/^-?\d+$/s;
297 1 50       5 if (scalar( @limit))
298             {
299 0         0 for ( @_[0..($#_ - 1)] )
300             {
301 0         0 push( @limit, undef);
302             }
303             }
304 1         4 @_;
305             }
306              
307             sub _open{
308 0     0   0 my $str = pop;
309 0   0     0 open INPUT, "$str" || confess "Could not open $str";
310 0         0 my $text = join '', ;
311 0         0 close INPUT;
312 0 0       0 $text || carp "Opening $str did not result in any data";
313             }
314              
315             sub superjoin{
316 1     1 1 5 my $array_ref = pop;
317 1 50       8 push ( @_, "\t") if @_ < 1;
318 1 50       4 push ( @_, "\n") if @_ < 2;
319 1 50       5 return undef unless( ref( $array_ref ) eq 'ARRAY' );
320 1 50       7 return undef unless( ref( $array_ref->[0] ) =~ /ARRAY/ );
321 1         2 my @newarray = map{ [ @$_ ] } @$array_ref;
  2         9  
322 1         4 _join( @_, \@newarray);
323             }
324              
325             sub _join{
326 3     3   5 my $array_ref = pop;
327 3         4 my $str = pop;
328 3 100       94 if (@_) {
329 1         3 @$array_ref = map {_join( @_, $_)} @$array_ref;
  2         6  
330             }
331 3         18 join $str, @$array_ref;
332             }
333              
334             1;
335