File Coverage

blib/lib/Perl6/Slurp.pm

Criterion	Covered	Total	%
statement	101	102	99.0
branch	58	66	87.8
condition	17	21	80.9
subroutine	13	13	100.0
pod	0	2	0.0
total	189	204	92.6

line	stmt	bran	cond	sub	pod	time	code
1							package Perl6::Slurp;
2
3	13			13		407011	use warnings;
	13					29
	13					418
4	13			13		74	use strict;
	13					26
	13					482
5	13			13		324	use 5.008;
	13					47
	13					700
6	13			13		77	use Carp;
	13					17
	13					1418
7	13			13		82	use Scalar::Util 'refaddr';
	13					30
	13					1935
8
9							our $VERSION = '0.051005';
10
11							# Exports only the slurp() sub...
12							sub import {
13	13			13		71	no strict 'refs';
	13					22
	13					10657
14	13			13		118	*{caller().'::slurp'} = \&slurp;
	13					200
15							}
16
17							# Recognize mode arguments...
18							my $mode_pat = qr{
19							^ \s* ( (?: < \| \+< \| \+>>? ) &? ) \s*
20							}x;
21
22							# Recognize a mode followed by optional layer arguments...
23							my $mode_plus_layers = qr{
24							(?: $mode_pat \| ^ \s* -\\| \s* )
25							( (?: :[^\W\d]\w* (?: $ .? $ ?)? \s )* )
26							\s*
27							\z
28							}x;
29
30							# Is this a pure number???
31							sub is_pure_num {
32	26			26	0	119	return (~$_[0] & $_[0]) eq 0; # ~ acts differently for numbers and strings
33							}
34
35							# The magic subroutine that does everything...
36							sub slurp {
37							# Are we in a useful context???
38	140			140	0	136959	my $list_context = wantarray;
39	140	100				614	croak "Useless use of &slurp in a void context"
40							unless defined $list_context;
41
42							# Missing args default to $_, so we need to catch that early...
43	139					204	my $default = $_;
44
45							# Remember any I/O layers and other options specified...
46	139					178	my @layers_or_options;
47
48							# Process the argument list...
49	139					834	for (my $i=0; $i<@_; $i++) {
50							# Ignore non-reference args...
51	247	100				805	my $type = ref $_[$i] or next;
52
53							# Hashes indicate extra layers; remove from @_, add them in sequence...
54	139	100				542	if ($type eq 'HASH') {
		100
55	48					249	push @layers_or_options, splice @_, $i--, 1
56							}
57
58							# Arrays also indicate extra layers; remove from @_, convert to hash
59							# form, and add them in sequence...
60							elsif ($type eq 'ARRAY') {
61							# Splice out the array and unpack it...
62	3					3	my @array = @{splice @_, $i--, 1};
	3					12
63
64							# Verify and convert each layer specified to a one-key hash...
65	3					14	while (@array) {
66	6					15	my ($layer, $value) = splice @array, 0, 2;
67	6	50				16	croak "Incomplete layer specification for :$layer",
68							"\n(did you mean: $layer=>1)\n "
69							unless $value;
70	6					29	push @layers_or_options, { $layer=>$value };
71							}
72							}
73							}
74
75							# Any remaining args are the read mode, source file, and whatever...
76	139					295	my ($mode, $source, @args) = @_;
77
78							# If no arguments, use defaults...
79	139	100				298	if (!defined $mode) {
80	6	50				20	$mode = defined $default ? $default
		100
81							: @ARGV ? \*ARGV
82							: "<"
83							}
84
85							# If mode was a reference, it must really have been the source...
86	139	100				1109	if (ref $mode) {
		100
87	53					73	$source = $mode;
88	53					89	$mode = "<";
89							}
90
91							# If mode isn't a valid mode, it must actually have been the source...
92							elsif ($mode !~ /$mode_plus_layers/x) {
93	25					75	$source = $mode;
94	25	100				193	$mode = $source =~ s/$mode_pat//x ? "$1"
		100
95							: $source =~ s/ \\| \s* $//x ? "-\|"
96							: "<"
97							;
98							}
99
100							# Sources can be references, but only certain kinds of references...
101	139					216	my $ref = ref $source;
102	139	100				288	if ($ref) {
103							croak "Can't use $ref as a data source"
104							unless $ref eq 'SCALAR'
105							\|\| $ref eq 'GLOB'
106	89	50	100			401	\|\| eval { $source->isa('IO::Handle') };
	21		66			145
107							}
108
109							# slurp() always uses \n as its input record separator (a la Perl 6)
110	139					503	local $/ = "\n";
111
112							# This track the various options slurp() allows...
113	139					262	my ($chomp, $chomp_to, $layers) = (0, "", "");
114
115							# Can this slurp be done in an optimized way (assume so initially)???
116	139					165	my $optimized = 1;
117
118							# Decode the layers and options...
119	139					176	my $IRS = "\n";
120	139					272	for (@layers_or_options) {
121							# Input record separator...
122	54	100				721	if (exists $_->{irs}) {
123	30					46	$IRS = $_->{irs};
124	30	100				79	$/ = $IRS if !ref($IRS);
125	30					56	delete $_->{irs};
126	30		66			649	$optimized &&= !ref($IRS); # ...can't be optimized if irs is a regex
127							}
128
129							# Autochomp...
130	54	100				1473	if (exists $_->{chomp}) {
131	26					30	$chomp = 1;
132
133							# If the chomp value is a string, that becomes to replacement...
134	26	100	66			87	if (defined $_->{chomp} && !is_pure_num($_->{chomp})) {
135	14					26	$chomp_to = $_->{chomp}
136							}
137	26					53	delete $_->{chomp};
138	26					31	$optimized = 0; # ...chomped slurps can't be optimized
139							}
140
141							# Any other entries are layers...
142	54					227	$layers .= join " ", map ":$_", keys %$_;
143							}
144
145							# Add any layers found to the mode specification...
146	139					262	$mode .= " $layers";
147
148							# Open the source as a filehandle...
149	139					163	my $FH;
150
151							# Source is a typeglob...
152	139	100	100			615	if ($ref && $ref ne 'SCALAR') {
		50
153	43					69	$FH = $source;
154							}
155
156							# No source, specified: use *ARGV...
157							elsif (!$source) {
158	13			13		82	no warnings 'io';
	13					25
	13					926
159	0	0				0	open $FH, '<-'
160							or croak "Can't open stdin: $!";
161							}
162
163							# Source specified: open it...
164							else {
165	13			13		65	no warnings 'io';
	13					23
	13					5758
166	3	100		3		29	open $FH, $mode, $source, @args
	3					5
	3					33
	96					31652
167							or croak "Can't open '$source': $!";
168							}
169
170							# Standardize chomp-converter sub...
171	137	50		90		5490	my $chomp_into = ref $chomp_to eq 'CODE' ? $chomp_to : sub{ $chomp_to };
	90					282
172
173							# Optimized slurp if possible in list context...
174	137	100	100			1162	if ($list_context && $optimized) {
175	36					985	return <$FH>;
176							}
177
178							# Acquire data (working around bug between $/ and in magic ARGV)...
179	101	100				629	my $data = refaddr($FH) == \*ARGV ? join("",<>) : do { local $/; <$FH> };
	100					556
	100					5289
180
181							# Prepare input record separator regex...
182	101	100				1622	my $irs = ref($IRS) ? $IRS
		100
183							: defined($IRS) ? qr{\Q$IRS\E}
184							: qr{(?!)};
185
186							# List context may require input record separator processing...
187	101	100				308	if ($list_context) {
188							# No data --> nothing to return...
189	14	50				36	return () unless defined $data;
190
191							# Split acquired data into lines according to IRS...
192	14					295	my @components = split /($irs)/, $data;
193	14					29	my @lines;
194	14					34	while (@components) {
195							# Extract the next line and separator...
196	56					109	my ($line, $sep) = splice @components, 0, 2;
197
198							# Add the line...
199	56					80	push @lines, $line;
200
201							# Chomp as requested...
202	56	100	66			229	if (defined $sep && length $sep) {
203	50	100				129	$lines[-1] .= $chomp ? $chomp_into->($sep) : $sep;
204							}
205							}
206	14					177	return @lines;
207							}
208
209							# Scalar context...
210							else {
211							# No data --> nothing to return...
212	87	100				257	return q{} unless defined $data;
213
214							# Otherwise, do any requested chomp-conversion...
215	78	100				227	if ($chomp) {
216	13					208	$data =~ s{($irs)}{$chomp_into->($1)}ge;
	45					74
217							}
218
219	78					1630	return $data;
220							}
221							}
222
223							1;
224							__END__
225
226
227							=head1 NAME
228
229							Perl6::Slurp - Implements the Perl 6 'slurp' built-in
230
231
232							=head1 SYNOPSIS
233
234							use Perl6::Slurp;
235
236							# Slurp a file by name...
237
238							$file_contents = slurp 'filename';
239							$file_contents = slurp '<filename';
240							$file_contents = slurp '<', 'filename';
241							$file_contents = slurp '+<', 'filename';
242
243
244							# Slurp a file via an (already open!) handle...
245
246							$file_contents = slurp \*STDIN;
247							$file_contents = slurp $filehandle;
248							$file_contents = slurp IO::File->new('filename');
249
250
251							# Slurp a string...
252
253							$str_contents = slurp \$string;
254							$str_contents = slurp '<', \$string;
255
256
257							# Slurp a pipe (not on Windows, alas)...
258
259							$str_contents = slurp 'tail -20 $filename \|';
260							$str_contents = slurp '-\|', 'tail', -20, $filename;
261
262
263							# Slurp with no source slurps from whatever $_ indicates...
264
265							for (@files) {
266							$contents .= slurp;
267							}
268
269							# ...or from the entire ARGV list, if $_ is undefined...
270
271							$_ = undef;
272							$ARGV_contents = slurp;
273
274
275							# Specify I/O layers as part of mode...
276
277							$file_contents = slurp '<:raw', $file;
278							$file_contents = slurp '<:utf8', $file;
279							$file_contents = slurp '<:raw :utf8', $file;
280
281
282							# Specify I/O layers as separate options...
283
284							$file_contents = slurp $file, {raw=>1};
285							$file_contents = slurp $file, {utf8=>1};
286							$file_contents = slurp $file, {raw=>1}, {utf8=>1};
287							$file_contents = slurp $file, [raw=>1, utf8=>1];
288
289
290							# Specify input record separator...
291
292							$file_contents = slurp $file, {irs=>"\n\n"};
293							$file_contents = slurp '<', $file, {irs=>"\n\n"};
294							$file_contents = slurp {irs=>"\n\n"}, $file;
295
296
297							# Input record separator can be regex...
298
299							$file_contents = slurp $file, {irs=>qr/\n+/};
300							$file_contents = slurp '<', $file, {irs=>qr/\n+\|\t{2,}};
301
302
303							# Specify autochomping...
304
305							$file_contents = slurp $file, {chomp=>1};
306							$file_contents = slurp {chomp=>1}, $file;
307							$file_contents = slurp $file, {chomp=>1, irs=>"\n\n"};
308							$file_contents = slurp $file, {chomp=>1, irs=>qr/\n+/};
309
310
311							# Specify autochomping that replaces irs
312							# with another string...
313
314							$file_contents = slurp $file, {irs=>"\n\n", chomp=>"\n"};
315							$file_contents = slurp $file, {chomp=>"\n\n"}, {irs=>qr/\n+/};
316
317
318							# Specify autochomping that replaces
319							# irs with a dynamically computed string...
320
321							my $n = 1;
322							$file_contents = slurp $file, {chomp=>sub{ "\n#line ".$n++."\n"};
323
324
325							# Slurp in a list context...
326
327							@lines = slurp 'filename';
328							@lines = slurp $filehandle;
329							@lines = slurp \$string;
330							@lines = slurp '<:utf8', 'filename', {irs=>"\x{2020}", chomp=>"\n"};
331
332
333							=head1 DESCRIPTION
334
335							C<slurp> takes:
336
337							=over
338
339							=item *
340
341							a filename,
342
343							=item *
344
345							a filehandle,
346
347							=item *
348
349							a typeglob reference,
350
351							=item *
352
353							an IO::File object, or
354
355							=item *
356
357							a scalar reference,
358
359							=back
360
361							converts it to an input stream (using C<open()> if necessary), and reads
362							in the entire stream. If C<slurp> fails to set up or read the stream, it
363							throws an exception.
364
365							If no data source is specified C<slurp> uses the value of C<$_> as the
366							source. If C<$_> is undefined, C<slurp> uses the C<@ARGV> list,
367							and magically slurps the contents of I<all> the sources listed in C<@ARGV>.
368							Note that the same magic is also applied if you explicitly slurp <*ARGV>, so
369							the following three input operations:
370
371							$contents = join "", <ARGV>;
372
373							$contents = slurp \*ARGV;
374
375							$/ = undef;
376							$contents = slurp;
377
378							are identical in effect.
379
380							In a scalar context C<slurp> returns the stream contents as a single string.
381							If the stream is at EOF, it returns an empty string.
382							In a list context, it splits the contents after the appropriate input
383							record separator and returns the resulting list of strings.
384
385							You can set the input record separator (S<< C<< { irs => $your_irs_here}
386							>> >>) for the input operation. The separator can be specified as a
387							string or a regex. Note that an explicit input record separator has no
388							input-terminating effect in a scalar context; C<slurp> always
389							reads in the entire input stream, whatever the C<'irs'> value.
390
391							In a list context, changing the separator can change how the input is
392							broken up within the list that is returned.
393
394							If an input record separator is not explicitly specified, C<slurp>
395							defaults to C<"\n"> (I<not> to the current value of C<$/> E<ndash> since
396							Perl 6 doesn't I<have> a C<$/>);
397
398							You can also tell C<slurp> to automagically C<chomp> the input as it is
399							read in, by specifying: (S<< C<< { chomp => 1 } >> >>)
400
401							Better still, you can tell C<slurp> to automagically
402							C<chomp> the input and I<replace> what it chomps with another string,
403							by specifying: (S<< C<< { chomp => "another string" } >> >>)
404
405							You can also tell C<slurp> to compute the replacement string on-the-fly
406							by specifying a subroutine as the C<chomp> value:
407							(S<< C<< { chomp => sub{...} } >> >>). This subroutine is passed the string
408							being chomped off, so for example you could squeeze single newlines to a
409							single space and multiple consecutive newlines to a two newlines with:
410
411							sub squeeze {
412							my ($removed) = @_;
413							if ($removed =~ tr/\n/\n/ == 1) { return " " }
414							else { return "\n\n"; }
415							}
416
417							print slurp(\DATA, {irs=>qr/[ \t]\n+/, chomp=>\&squeeze}), "\n";
418
419							Which would transform:
420
421							This is the
422							first paragraph
423
424
425							This is the
426							second
427							paragraph
428
429							This, the
430							third
431
432
433
434
435							This one is
436							the
437							very
438							last
439
440							to:
441
442							This is the first paragraph
443
444							This is the second paragraph
445
446							This, the third
447
448							This one is the very last
449
450
451							Autochomping works in both scalar and list contexts. In scalar contexts every
452							instance of the input record separator will be removed (or replaced) within
453							the returned string. In list context, each list item returned with its
454							terminating separator removed (or replaced).
455
456							You can specify I/O layers, either using the Perl 5 notation:
457
458							slurp "<:layer1 :layer2 :etc", $filename;
459
460							or as an array of options:
461
462							slurp $filename, [layer1=>1, layer2=>1, etc=>1];
463							slurp [layer1=>1, layer2=>1, etc=>1], $filename;
464
465							or as individual options (each of which must be in a separate hash):
466
467							slurp $filename, {layer1=>1}, {layer2=>1}, {etc=>1};
468							slurp {layer1=>1}, {layer2=>1}, {etc=>1}, $filename;
469
470							(...which, of course, would look much cooler in Perl 6:
471
472							# Perl 6 only :-(
473
474							slurp $filename, :layer1 :layer2 :etc;
475							slurp :layer1 :layer2 :etc, $filename;
476
477							)
478
479							A common mistake is to put all the options together in one hash:
480
481							slurp $filename, {layer1=>1, layer2=>1, etc=>1};
482
483							This is almost always a disaster, since the order of I/O layers is usually
484							critical, and placing them all in one hash effectively randomizes that order.
485							Use an array instead:
486
487							slurp $filename, [layer1=>1, layer2=>1, etc=>1];
488
489
490							=head1 WARNINGS
491
492							The syntax and semantics of Perl 6 is still being finalized
493							and consequently is at any time subject to change. That means the
494							same caveat applies to this module.
495
496							When called with a filename or piped shell command, C<slurp()> uses
497							Perl's built- in C<open()> to access the file. This means that it
498							is subject to the same platform-specific limitations as C<open()>.
499							For example, slurping from piped shell commands may not work
500							under Windows.
501
502
503							=head1 DEPENDENCIES
504
505							Requires: Perl 5.8.0
506
507
508							=head1 AUTHOR
509
510							Damian Conway (damian@conway.org)
511
512
513							=head1 COPYRIGHT
514
515							Copyright (c) 2003-2012, Damian Conway. All Rights Reserved.
516							This module is free software. It may be used, redistributed
517							and/or modified under the same terms as Perl itself.