File Coverage

blib/lib/Perl6/Slurp.pm
Criterion Covered Total %
statement 101 102 99.0
branch 58 66 87.8
condition 17 21 80.9
subroutine 13 13 100.0
pod 0 2 0.0
total 189 204 92.6


line stmt bran cond sub pod time code
1             package Perl6::Slurp;
2              
3 13     13   407011 use warnings;
  13         29  
  13         418  
4 13     13   74 use strict;
  13         26  
  13         482  
5 13     13   324 use 5.008;
  13         47  
  13         700  
6 13     13   77 use Carp;
  13         17  
  13         1418  
7 13     13   82 use Scalar::Util 'refaddr';
  13         30  
  13         1935  
8              
9             our $VERSION = '0.051005';
10              
11             # Exports only the slurp() sub...
12             sub import {
13 13     13   71 no strict 'refs';
  13         22  
  13         10657  
14 13     13   118 *{caller().'::slurp'} = \&slurp;
  13         200  
15             }
16              
17             # Recognize mode arguments...
18             my $mode_pat = qr{
19             ^ \s* ( (?: < | \+< | \+>>? ) &? ) \s*
20             }x;
21              
22             # Recognize a mode followed by optional layer arguments...
23             my $mode_plus_layers = qr{
24             (?: $mode_pat | ^ \s* -\| \s* )
25             ( (?: :[^\W\d]\w* (?: \( .*? \) ?)? \s* )* )
26             \s*
27             \z
28             }x;
29              
30             # Is this a pure number???
31             sub is_pure_num {
32 26     26 0 119 return (~$_[0] & $_[0]) eq 0; # ~ acts differently for numbers and strings
33             }
34              
35             # The magic subroutine that does everything...
36             sub slurp {
37             # Are we in a useful context???
38 140     140 0 136959 my $list_context = wantarray;
39 140 100       614 croak "Useless use of &slurp in a void context"
40             unless defined $list_context;
41              
42             # Missing args default to $_, so we need to catch that early...
43 139         204 my $default = $_;
44              
45             # Remember any I/O layers and other options specified...
46 139         178 my @layers_or_options;
47              
48             # Process the argument list...
49 139         834 for (my $i=0; $i<@_; $i++) {
50             # Ignore non-reference args...
51 247 100       805 my $type = ref $_[$i] or next;
52              
53             # Hashes indicate extra layers; remove from @_, add them in sequence...
54 139 100       542 if ($type eq 'HASH') {
    100          
55 48         249 push @layers_or_options, splice @_, $i--, 1
56             }
57              
58             # Arrays also indicate extra layers; remove from @_, convert to hash
59             # form, and add them in sequence...
60             elsif ($type eq 'ARRAY') {
61             # Splice out the array and unpack it...
62 3         3 my @array = @{splice @_, $i--, 1};
  3         12  
63              
64             # Verify and convert each layer specified to a one-key hash...
65 3         14 while (@array) {
66 6         15 my ($layer, $value) = splice @array, 0, 2;
67 6 50       16 croak "Incomplete layer specification for :$layer",
68             "\n(did you mean: $layer=>1)\n "
69             unless $value;
70 6         29 push @layers_or_options, { $layer=>$value };
71             }
72             }
73             }
74              
75             # Any remaining args are the read mode, source file, and whatever...
76 139         295 my ($mode, $source, @args) = @_;
77              
78             # If no arguments, use defaults...
79 139 100       298 if (!defined $mode) {
80 6 50       20 $mode = defined $default ? $default
    100          
81             : @ARGV ? \*ARGV
82             : "<"
83             }
84              
85             # If mode was a reference, it must really have been the source...
86 139 100       1109 if (ref $mode) {
    100          
87 53         73 $source = $mode;
88 53         89 $mode = "<";
89             }
90              
91             # If mode isn't a valid mode, it must actually have been the source...
92             elsif ($mode !~ /$mode_plus_layers/x) {
93 25         75 $source = $mode;
94 25 100       193 $mode = $source =~ s/$mode_pat//x ? "$1"
    100          
95             : $source =~ s/ \| \s* $//x ? "-|"
96             : "<"
97             ;
98             }
99              
100             # Sources can be references, but only certain kinds of references...
101 139         216 my $ref = ref $source;
102 139 100       288 if ($ref) {
103             croak "Can't use $ref as a data source"
104             unless $ref eq 'SCALAR'
105             || $ref eq 'GLOB'
106 89 50 100     401 || eval { $source->isa('IO::Handle') };
  21   66     145  
107             }
108              
109             # slurp() always uses \n as its input record separator (a la Perl 6)
110 139         503 local $/ = "\n";
111              
112             # This track the various options slurp() allows...
113 139         262 my ($chomp, $chomp_to, $layers) = (0, "", "");
114              
115             # Can this slurp be done in an optimized way (assume so initially)???
116 139         165 my $optimized = 1;
117              
118             # Decode the layers and options...
119 139         176 my $IRS = "\n";
120 139         272 for (@layers_or_options) {
121             # Input record separator...
122 54 100       721 if (exists $_->{irs}) {
123 30         46 $IRS = $_->{irs};
124 30 100       79 $/ = $IRS if !ref($IRS);
125 30         56 delete $_->{irs};
126 30   66     649 $optimized &&= !ref($IRS); # ...can't be optimized if irs is a regex
127             }
128              
129             # Autochomp...
130 54 100       1473 if (exists $_->{chomp}) {
131 26         30 $chomp = 1;
132              
133             # If the chomp value is a string, that becomes to replacement...
134 26 100 66     87 if (defined $_->{chomp} && !is_pure_num($_->{chomp})) {
135 14         26 $chomp_to = $_->{chomp}
136             }
137 26         53 delete $_->{chomp};
138 26         31 $optimized = 0; # ...chomped slurps can't be optimized
139             }
140              
141             # Any other entries are layers...
142 54         227 $layers .= join " ", map ":$_", keys %$_;
143             }
144              
145             # Add any layers found to the mode specification...
146 139         262 $mode .= " $layers";
147              
148             # Open the source as a filehandle...
149 139         163 my $FH;
150              
151             # Source is a typeglob...
152 139 100 100     615 if ($ref && $ref ne 'SCALAR') {
    50          
153 43         69 $FH = $source;
154             }
155              
156             # No source, specified: use *ARGV...
157             elsif (!$source) {
158 13     13   82 no warnings 'io';
  13         25  
  13         926  
159 0 0       0 open $FH, '<-'
160             or croak "Can't open stdin: $!";
161             }
162              
163             # Source specified: open it...
164             else {
165 13     13   65 no warnings 'io';
  13         23  
  13         5758  
166 3 100   3   29 open $FH, $mode, $source, @args
  3         5  
  3         33  
  96         31652  
167             or croak "Can't open '$source': $!";
168             }
169              
170             # Standardize chomp-converter sub...
171 137 50   90   5490 my $chomp_into = ref $chomp_to eq 'CODE' ? $chomp_to : sub{ $chomp_to };
  90         282  
172              
173             # Optimized slurp if possible in list context...
174 137 100 100     1162 if ($list_context && $optimized) {
175 36         985 return <$FH>;
176             }
177              
178             # Acquire data (working around bug between $/ and in magic ARGV)...
179 101 100       629 my $data = refaddr($FH) == \*ARGV ? join("",<>) : do { local $/; <$FH> };
  100         556  
  100         5289  
180              
181             # Prepare input record separator regex...
182 101 100       1622 my $irs = ref($IRS) ? $IRS
    100          
183             : defined($IRS) ? qr{\Q$IRS\E}
184             : qr{(?!)};
185              
186             # List context may require input record separator processing...
187 101 100       308 if ($list_context) {
188             # No data --> nothing to return...
189 14 50       36 return () unless defined $data;
190              
191             # Split acquired data into lines according to IRS...
192 14         295 my @components = split /($irs)/, $data;
193 14         29 my @lines;
194 14         34 while (@components) {
195             # Extract the next line and separator...
196 56         109 my ($line, $sep) = splice @components, 0, 2;
197              
198             # Add the line...
199 56         80 push @lines, $line;
200              
201             # Chomp as requested...
202 56 100 66     229 if (defined $sep && length $sep) {
203 50 100       129 $lines[-1] .= $chomp ? $chomp_into->($sep) : $sep;
204             }
205             }
206 14         177 return @lines;
207             }
208              
209             # Scalar context...
210             else {
211             # No data --> nothing to return...
212 87 100       257 return q{} unless defined $data;
213              
214             # Otherwise, do any requested chomp-conversion...
215 78 100       227 if ($chomp) {
216 13         208 $data =~ s{($irs)}{$chomp_into->($1)}ge;
  45         74  
217             }
218              
219 78         1630 return $data;
220             }
221             }
222              
223             1;
224             __END__
225              
226              
227             =head1 NAME
228              
229             Perl6::Slurp - Implements the Perl 6 'slurp' built-in
230              
231              
232             =head1 SYNOPSIS
233              
234             use Perl6::Slurp;
235              
236             # Slurp a file by name...
237              
238             $file_contents = slurp 'filename';
239             $file_contents = slurp '<filename';
240             $file_contents = slurp '<', 'filename';
241             $file_contents = slurp '+<', 'filename';
242              
243              
244             # Slurp a file via an (already open!) handle...
245              
246             $file_contents = slurp \*STDIN;
247             $file_contents = slurp $filehandle;
248             $file_contents = slurp IO::File->new('filename');
249              
250              
251             # Slurp a string...
252              
253             $str_contents = slurp \$string;
254             $str_contents = slurp '<', \$string;
255              
256              
257             # Slurp a pipe (not on Windows, alas)...
258              
259             $str_contents = slurp 'tail -20 $filename |';
260             $str_contents = slurp '-|', 'tail', -20, $filename;
261              
262              
263             # Slurp with no source slurps from whatever $_ indicates...
264              
265             for (@files) {
266             $contents .= slurp;
267             }
268              
269             # ...or from the entire ARGV list, if $_ is undefined...
270              
271             $_ = undef;
272             $ARGV_contents = slurp;
273              
274              
275             # Specify I/O layers as part of mode...
276              
277             $file_contents = slurp '<:raw', $file;
278             $file_contents = slurp '<:utf8', $file;
279             $file_contents = slurp '<:raw :utf8', $file;
280              
281              
282             # Specify I/O layers as separate options...
283              
284             $file_contents = slurp $file, {raw=>1};
285             $file_contents = slurp $file, {utf8=>1};
286             $file_contents = slurp $file, {raw=>1}, {utf8=>1};
287             $file_contents = slurp $file, [raw=>1, utf8=>1];
288              
289              
290             # Specify input record separator...
291              
292             $file_contents = slurp $file, {irs=>"\n\n"};
293             $file_contents = slurp '<', $file, {irs=>"\n\n"};
294             $file_contents = slurp {irs=>"\n\n"}, $file;
295              
296              
297             # Input record separator can be regex...
298              
299             $file_contents = slurp $file, {irs=>qr/\n+/};
300             $file_contents = slurp '<', $file, {irs=>qr/\n+|\t{2,}};
301              
302              
303             # Specify autochomping...
304              
305             $file_contents = slurp $file, {chomp=>1};
306             $file_contents = slurp {chomp=>1}, $file;
307             $file_contents = slurp $file, {chomp=>1, irs=>"\n\n"};
308             $file_contents = slurp $file, {chomp=>1, irs=>qr/\n+/};
309              
310              
311             # Specify autochomping that replaces irs
312             # with another string...
313              
314             $file_contents = slurp $file, {irs=>"\n\n", chomp=>"\n"};
315             $file_contents = slurp $file, {chomp=>"\n\n"}, {irs=>qr/\n+/};
316              
317              
318             # Specify autochomping that replaces
319             # irs with a dynamically computed string...
320              
321             my $n = 1;
322             $file_contents = slurp $file, {chomp=>sub{ "\n#line ".$n++."\n"};
323              
324              
325             # Slurp in a list context...
326              
327             @lines = slurp 'filename';
328             @lines = slurp $filehandle;
329             @lines = slurp \$string;
330             @lines = slurp '<:utf8', 'filename', {irs=>"\x{2020}", chomp=>"\n"};
331              
332              
333             =head1 DESCRIPTION
334              
335             C<slurp> takes:
336              
337             =over
338              
339             =item *
340              
341             a filename,
342              
343             =item *
344              
345             a filehandle,
346              
347             =item *
348              
349             a typeglob reference,
350              
351             =item *
352              
353             an IO::File object, or
354              
355             =item *
356              
357             a scalar reference,
358              
359             =back
360              
361             converts it to an input stream (using C<open()> if necessary), and reads
362             in the entire stream. If C<slurp> fails to set up or read the stream, it
363             throws an exception.
364              
365             If no data source is specified C<slurp> uses the value of C<$_> as the
366             source. If C<$_> is undefined, C<slurp> uses the C<@ARGV> list,
367             and magically slurps the contents of I<all> the sources listed in C<@ARGV>.
368             Note that the same magic is also applied if you explicitly slurp <*ARGV>, so
369             the following three input operations:
370              
371             $contents = join "", <ARGV>;
372              
373             $contents = slurp \*ARGV;
374              
375             $/ = undef;
376             $contents = slurp;
377              
378             are identical in effect.
379              
380             In a scalar context C<slurp> returns the stream contents as a single string.
381             If the stream is at EOF, it returns an empty string.
382             In a list context, it splits the contents after the appropriate input
383             record separator and returns the resulting list of strings.
384              
385             You can set the input record separator (S<< C<< { irs => $your_irs_here}
386             >> >>) for the input operation. The separator can be specified as a
387             string or a regex. Note that an explicit input record separator has no
388             input-terminating effect in a scalar context; C<slurp> always
389             reads in the entire input stream, whatever the C<'irs'> value.
390              
391             In a list context, changing the separator can change how the input is
392             broken up within the list that is returned.
393              
394             If an input record separator is not explicitly specified, C<slurp>
395             defaults to C<"\n"> (I<not> to the current value of C<$/> E<ndash> since
396             Perl 6 doesn't I<have> a C<$/>);
397              
398             You can also tell C<slurp> to automagically C<chomp> the input as it is
399             read in, by specifying: (S<< C<< { chomp => 1 } >> >>)
400              
401             Better still, you can tell C<slurp> to automagically
402             C<chomp> the input and I<replace> what it chomps with another string,
403             by specifying: (S<< C<< { chomp => "another string" } >> >>)
404              
405             You can also tell C<slurp> to compute the replacement string on-the-fly
406             by specifying a subroutine as the C<chomp> value:
407             (S<< C<< { chomp => sub{...} } >> >>). This subroutine is passed the string
408             being chomped off, so for example you could squeeze single newlines to a
409             single space and multiple consecutive newlines to a two newlines with:
410              
411             sub squeeze {
412             my ($removed) = @_;
413             if ($removed =~ tr/\n/\n/ == 1) { return " " }
414             else { return "\n\n"; }
415             }
416              
417             print slurp(\*DATA, {irs=>qr/[ \t]*\n+/, chomp=>\&squeeze}), "\n";
418              
419             Which would transform:
420              
421             This is the
422             first paragraph
423              
424              
425             This is the
426             second
427             paragraph
428              
429             This, the
430             third
431              
432              
433              
434              
435             This one is
436             the
437             very
438             last
439              
440             to:
441              
442             This is the first paragraph
443              
444             This is the second paragraph
445              
446             This, the third
447              
448             This one is the very last
449              
450              
451             Autochomping works in both scalar and list contexts. In scalar contexts every
452             instance of the input record separator will be removed (or replaced) within
453             the returned string. In list context, each list item returned with its
454             terminating separator removed (or replaced).
455              
456             You can specify I/O layers, either using the Perl 5 notation:
457              
458             slurp "<:layer1 :layer2 :etc", $filename;
459              
460             or as an array of options:
461              
462             slurp $filename, [layer1=>1, layer2=>1, etc=>1];
463             slurp [layer1=>1, layer2=>1, etc=>1], $filename;
464              
465             or as individual options (each of which must be in a separate hash):
466              
467             slurp $filename, {layer1=>1}, {layer2=>1}, {etc=>1};
468             slurp {layer1=>1}, {layer2=>1}, {etc=>1}, $filename;
469              
470             (...which, of course, would look much cooler in Perl 6:
471              
472             # Perl 6 only :-(
473              
474             slurp $filename, :layer1 :layer2 :etc;
475             slurp :layer1 :layer2 :etc, $filename;
476              
477             )
478              
479             A common mistake is to put all the options together in one hash:
480              
481             slurp $filename, {layer1=>1, layer2=>1, etc=>1};
482              
483             This is almost always a disaster, since the order of I/O layers is usually
484             critical, and placing them all in one hash effectively randomizes that order.
485             Use an array instead:
486              
487             slurp $filename, [layer1=>1, layer2=>1, etc=>1];
488              
489              
490             =head1 WARNINGS
491              
492             The syntax and semantics of Perl 6 is still being finalized
493             and consequently is at any time subject to change. That means the
494             same caveat applies to this module.
495              
496             When called with a filename or piped shell command, C<slurp()> uses
497             Perl's built- in C<open()> to access the file. This means that it
498             is subject to the same platform-specific limitations as C<open()>.
499             For example, slurping from piped shell commands may not work
500             under Windows.
501              
502              
503             =head1 DEPENDENCIES
504              
505             Requires: Perl 5.8.0
506              
507              
508             =head1 AUTHOR
509              
510             Damian Conway (damian@conway.org)
511              
512              
513             =head1 COPYRIGHT
514              
515             Copyright (c) 2003-2012, Damian Conway. All Rights Reserved.
516             This module is free software. It may be used, redistributed
517             and/or modified under the same terms as Perl itself.