File Coverage

blib/lib/PPIx/QuoteLike.pm

Criterion	Covered	Total	%
statement	390	429	90.9
branch	152	222	68.4
condition	43	67	64.1
subroutine	63	68	92.6
pod	24	24	100.0
total	672	810	82.9

line	stmt	bran	cond	sub	pod	time	code
1							package PPIx::QuoteLike;
2
3	7			7		211762	use 5.006;
	7					46
4
5	7			7		33	use strict;
	7					12
	7					125
6	7			7		28	use warnings;
	7					10
	7					188
7
8	7			7		32	use Carp;
	7					9
	7					408
9	7			7		3328	use Encode ();
	7					62633
	7					145
10	7			7		39	use List::Util ();
	7					12
	7					155
11	7					916	use PPIx::QuoteLike::Constant qw{
12							ARRAY_REF
13							LOCATION_LINE
14							LOCATION_CHARACTER
15							LOCATION_COLUMN
16							LOCATION_LOGICAL_LINE
17							LOCATION_LOGICAL_FILE
18							MINIMUM_PERL
19							VARIABLE_RE
20							@CARP_NOT
21	7			7		1363	};
	7					12
22	7			7		2034	use PPIx::QuoteLike::Token::Control;
	7					15
	7					184
23	7			7		2193	use PPIx::QuoteLike::Token::Delimiter;
	7					16
	7					201
24	7			7		2057	use PPIx::QuoteLike::Token::Interpolation;
	7					19
	7					202
25	7			7		2057	use PPIx::QuoteLike::Token::String;
	7					15
	7					164
26	7			7		38	use PPIx::QuoteLike::Token::Structure;
	7					10
	7					130
27	7			7		1950	use PPIx::QuoteLike::Token::Unknown;
	7					16
	7					149
28	7			7		1877	use PPIx::QuoteLike::Token::Whitespace;
	7					13
	7					217
29	7					410	use PPIx::QuoteLike::Utils qw{
30							column_number
31							line_number
32							logical_filename
33							logical_line_number
34							statement
35							visual_column_number
36							__instance
37							__match_enclosed
38							__matching_delimiter
39	7			7		41	};
	7					11
40	7			7		32	use Scalar::Util ();
	7					12
	7					183
41
42							our $VERSION = '0.022_01';
43
44	7			7		33	use constant CLASS_CONTROL => 'PPIx::QuoteLike::Token::Control';
	7					11
	7					329
45	7			7		35	use constant CLASS_DELIMITER => 'PPIx::QuoteLike::Token::Delimiter';
	7					10
	7					265
46	7			7		32	use constant CLASS_INTERPOLATION => 'PPIx::QuoteLike::Token::Interpolation';
	7					11
	7					266
47	7			7		30	use constant CLASS_STRING => 'PPIx::QuoteLike::Token::String';
	7					11
	7					260
48	7			7		35	use constant CLASS_STRUCTURE => 'PPIx::QuoteLike::Token::Structure';
	7					9
	7					246
49	7			7		33	use constant CLASS_UNKNOWN => 'PPIx::QuoteLike::Token::Unknown';
	7					9
	7					274
50	7			7		32	use constant CLASS_WHITESPACE => 'PPIx::QuoteLike::Token::Whitespace';
	7					11
	7					355
51
52	7			7		34	use constant CODE_REF => ref sub {};
	7					11
	7					310
53
54	7					232	use constant ILLEGAL_FIRST =>
55	7			7		35	'Tokenizer found illegal first characters';
	7					12
56	7					340	use constant MISMATCHED_DELIM =>
57	7			7		29	'Tokenizer found mismatched delimiters';
	7					15
58	7					23262	use constant NO_INDENTATION =>
59	7			7		39	'No indentation string found';
	7					13
60
61							{
62							my $match_sq = __match_enclosed( qw< ' > );
63							my $match_dq = __match_enclosed( qw< " > );
64							my $match_bt = __match_enclosed( qw< ` > );
65
66							sub new { ## no critic (RequireArgUnpacking)
67	114			114	1	28354	my ( $class, $source, %arg ) = @_;
68
69	114					176	my @children;
70
71	114	100				271	if ( $arg{location} ) {
72							ARRAY_REF eq ref $arg{location}
73	2	50				11	or croak q;
74	2					8	foreach my $inx ( 0 .. 3 ) {
75	8	50				23	$arg{location}[$inx] =~ m/ [^0-9] /smx
76							and croak "Argument 'location' element $inx must be an unsigned integer";
77							}
78							}
79
80	114	50				240	if ( ! defined $arg{index_locations} ) {
81							$arg{index_locations} = !! $arg{location} \|\|
82	114		100			425	__instance( $source, 'PPI::Element' );
83							}
84
85							my $self = {
86							index_locations => $arg{index_locations},
87							children => \@children,
88							encoding => $arg{encoding},
89							failures => 0,
90							location => $arg{location},
91	114					500	source => $source,
92							};
93
94	114		33			365	bless $self, ref $class \|\| $class;
95
96	114	100				271	defined( my $string = $self->_stringify_source( $source ) )
97							or return;
98
99	62					120	my ( $type, $gap, $gap2, $content, $end_delim, $indented, $start_delim );
100
101							$arg{trace}
102	62	50				231	and warn "Initial match of $string\n";
103
104							# q<>, qq<>, qx<>
105	62	100				1060	if ( $string =~ m/ \A \s* ( q [qx]? ) ( \s* ) ( . ) /smxgc ) {
		100
		50
106	9					38	( $type, $gap, $start_delim ) = ( $1, $2, $3 );
107	9	50	66			62	not $gap
108							and $start_delim =~ m< \A \w \z >smx
109							and return $self->_link_elems( $self->_make_token(
110							CLASS_UNKNOWN, $string, error => ILLEGAL_FIRST ) );
111							$arg{trace}
112	9	50				32	and warn "Initial match '$type$start_delim'\n";
113	9		100			50	$self->{interpolates} = 'qq' eq $type \|\|
114							'qx' eq $type && q<'> ne $start_delim;
115	9		50			34	$content = substr $string, ( pos $string \|\| 0 );
116	9					32	$end_delim = __matching_delimiter( $start_delim );
117	9	50				39	if ( $end_delim eq substr $content, -1 ) {
118	9					24	chop $content;
119							} else {
120	0					0	$end_delim = '';
121							}
122
123							# here doc
124							# Note that the regexp used here is slightly wrong in that white
125							# space between the '<<' and the termination string is not
126							# allowed if the termination string is not quoted in some way.
127							} elsif ( $string =~ m/ \A \s* ( << ) ( \s* ) ( ~? ) ( \s* )
128							( [\\]? \w+ \| $match_sq \| $match_dq \| $match_bt ) \n /smxgc ) {
129	7					40	( $type, $gap, $indented, $gap2, $start_delim ) = (
130							$1, $2, $3, $4, $5 );
131							$arg{trace}
132	7	50				25	and warn "Initial match '$type$start_delim$gap$indented'\n";
133	7					147	$self->{interpolates} = $start_delim !~ m/ \A [\\'] /smx;
134	7		50			31	$content = substr $string, ( pos $string \|\| 0 );
135	7					23	$end_delim = _unquote( $start_delim );
136							# NOTE that the indentation is specifically space or tab
137							# only.
138	7	50				137	if ( $content =~ s/ ^ ( [ \t]* ) \Q$end_delim\E \n? \z //smx ) {
139							# NOTE PPI::Token::HereDoc does not preserve the
140							# indentation of an indented here document, so the
141							# indentation will appear to be '' if we came from PPI.
142	7	100				22	if ( $indented ) {
143							# Version per perldelta.pod for that release.
144	4					13	$self->{perl_version_introduced} = '5.025007';
145	4					14	$self->{indentation} = "$1";
146	4					44	$self->{_indentation_re} = qr/
147							^ \Q$self->{indentation}\E /smx;
148							}
149							} else {
150	0					0	$end_delim = '';
151							}
152							$self->{start} = [
153	7					31	$self->_make_token( CLASS_DELIMITER, $start_delim ),
154							$self->_make_token( CLASS_WHITESPACE, "\n" ),
155							];
156
157							# Don't instantiate yet -- we'll do them at the end.
158							$self->{finish} = [
159	7					27	[ CLASS_DELIMITER, $end_delim ],
160							[ CLASS_WHITESPACE, "\n" ],
161							];
162
163							# ``, '', "", <>
164							} elsif ( $string =~ m/ \A \s* ( [`'"<] ) /smxgc ) {
165	46					167	( $type, $gap, $start_delim ) = ( '', '', $1 );
166							$arg{trace}
167	46	50				114	and warn "Initial match '$type$start_delim'\n";
168	46					145	$self->{interpolates} = q<'> ne $start_delim;
169	46		50			145	$content = substr $string, ( pos $string \|\| 0 );
170	46					163	$end_delim = __matching_delimiter( $start_delim );
171	46	50				118	if ( $end_delim eq substr $content, -1 ) {
172	46					105	chop $content;
173							} else {
174	0					0	$end_delim = '';
175							}
176
177							# Something we do not recognize
178							} else {
179							$arg{trace}
180	0	0				0	and warn "No initial match\n";
181	0					0	return $self->_link_elems( $self->_make_token(
182							CLASS_UNKNOWN, $string, error => ILLEGAL_FIRST ) );
183							}
184
185	62	100				178	$self->{interpolates} = $self->{interpolates} ? 1 : 0;
186
187	62		100			240	defined or $_ = '' for $indented, $gap2;
188							$self->{type} = [
189	62	100				180	$self->_make_token( CLASS_STRUCTURE, $type ),
		100
		50
190							length $gap ?
191							$self->_make_token( CLASS_WHITESPACE, $gap ) :
192							(),
193							length $indented ?
194							$self->_make_token( CLASS_STRUCTURE, $indented ) :
195							(),
196							length $gap2 ?
197							$self->_make_token( CLASS_WHITESPACE, $gap2 ) :
198							(),
199							];
200							$self->{start} \|\|= [
201	62		100			288	$self->_make_token( CLASS_DELIMITER, $start_delim ),
202							];
203
204							$arg{trace}
205	62	50				143	and warn "Without delimiters: '$content'\n";
206
207							# We accumulate data and manufacure tokens at the end to reduce
208							# the overhead involved in merging strings.
209	62	100				128	if ( $self->{interpolates} ) {
210	49					126	push @children, [ '' => '' ]; # Prime the pump
211	49					83	while ( 1 ) {
212
213	148	100				683	if ( $content =~ m/ \G ( \\ [ULulQEF] ) /smxgc ) {
		100
		100
		100
214	4					15	push @children, [ CLASS_CONTROL, "$1" ];
215							} elsif (
216							$content =~ m/ \G ( \\ N [{] ( [^}]+ ) [}] ) /smxgc
217							) {
218							# Handle \N{...} separately because it can not
219							# contain an interpolation even inside of an
220							# otherwise-interpolating string. That is to say,
221							# "\N{$foo}" is simply invalid, and does not even
222							# try to interpolate $foo. {
223							# TODO use $re = __match_enclosed( '{' ); # }
224	1					4	my ( $seq, $name ) = ( $1, $2 );
225							# TODO The Regexp is certainly too permissive. For
226							# the moment all I am doing is disallowing
227							# interpolation.
228	1	50				8	push @children, $name =~ m/ [\$\@] /smx ?
229							[ CLASS_UNKNOWN, $seq,
230							error => "Unknown charname '$name'" ] :
231							[ CLASS_STRING, $seq ];
232							# NOTE in the following that I do not read perldata as
233							# saying there can be space between the sigil and the
234							# variable name, but Perl itself seems to accept it as
235							# of 5.30.1.
236							} elsif ( $content =~ m/ \G ( [\$\@] \#? \$* ) /smxgc ) {
237	51					200	push @children, $self->_interpolation( "$1", $content );
238							} elsif ( $content =~ m/ \G ( \\ . \| [^\\\$\@]+ ) /smxgc ) {
239	43					167	push @children, $self->_remove_here_doc_indentation(
240							"$1",
241							sibling => \@children,
242							);
243							} else {
244	49					100	last;
245							}
246							}
247
248	49					128	@children = _merge_strings( @children );
249	49					72	shift @children; # remove the priming
250
251							# Make the tokens, at long last.
252	49					103	foreach ( @children ) {
253	96					107	$_ = $self->_make_token( @{ $_ } );
	96					179
254							}
255
256							} else {
257
258							length $content
259	13	100				88	and push @children, map { $self->_make_token( @{ $_ } ) }
	15					20
	15					32
260							_merge_strings(
261							$self->_remove_here_doc_indentation( $content )
262							);
263
264							}
265
266							# Add the indentation before the end marker, if needed
267							$self->{indentation}
268							and push @children, $self->_make_token(
269	62	100				179	CLASS_WHITESPACE, $self->{indentation} );
270
271	62	100				150	if ( $self->{finish} ) {
272							# If we already have something here it is data, not objects.
273	7					15	foreach ( @{ $self->{finish} } ) {
	7					16
274	14					17	$_ = $self->_make_token( @{ $_ } );
	14					27
275							}
276							} else {
277							$self->{finish} = [
278	55					135	$self->_make_token( CLASS_DELIMITER, $end_delim ),
279							];
280							}
281
282	62	100				209	ref $_[1]
283							and pos( $_[1] ) = pos $string;
284
285	62					201	return $self->_link_elems();
286							}
287							}
288
289							sub child {
290	46			46	1	9502	my ( $self, $number ) = @_;
291	46					135	return $self->{children}[$number];
292							}
293
294							sub children {
295	116			116	1	212	my ( $self ) = @_;
296	116					182	return @{ $self->{children} };
	116					349
297							}
298
299							sub content {
300	27			27	1	74	my ( $self ) = @_;
301	27					109	return join '', map { $_->content() } grep { $_ } $self->elements();
	142					336
	142					204
302							}
303
304							sub delimiters {
305	27			27	1	72	my ( $self ) = @_;
306	54					214	return join '', grep { defined }
307	27					84	map { $self->_get_value_scalar( $_ ) }
	54					118
308							qw{ start finish };
309							}
310
311							# $self->_deprecation_notice( $type, $name );
312							#
313							# This method centralizes deprecation. Type is 'attribute' or
314							# 'method'. Deprecation is driven of the %deprecate hash. Values
315							# are:
316							# false - no warning
317							# 1 - warn on first use
318							# 2 - warn on each use
319							# 3 - die on each use.
320							#
321							# $self->_deprecation_in_progress( $type, $name )
322							#
323							# This method returns true if the deprecation is in progress. In
324							# fact it returns the deprecation level.
325
326							=begin comment
327
328							# Abandoned in place, against future need.
329
330							{
331
332							my %deprecate = (
333							attribute => {
334							postderef => 3,
335							},
336							);
337
338							sub _deprecation_notice {
339							my ( undef, $type, $name, $repl ) = @_; # Invocant unused
340							$deprecate{$type} or return;
341							$deprecate{$type}{$name} or return;
342							my $msg = sprintf 'The %s %s is %s', $name, $type,
343							$deprecate{$type}{$name} > 2 ? 'removed' : 'deprecated';
344							defined $repl
345							and $msg .= "; use $repl instead";
346							$deprecate{$type}{$name} >= 3
347							and croak $msg;
348							warnings::enabled( 'deprecated' )
349							and carp $msg;
350							$deprecate{$type}{$name} == 1
351							and $deprecate{$type}{$name} = 0;
352							return;
353							}
354
355							}
356
357							=end comment
358
359							=cut
360
361							sub _get_value_scalar {
362	54			54		96	my ( $self, $method ) = @_;
363	54	50				126	defined( my $val = $self->$method() )
364							or return;
365	54	50				183	return ref $val ? $val->content() : $val;
366							}
367
368							sub elements {
369	84			84	1	3063	my ( $self ) = @_;
370	84					146	return @{ $self->{elements} \|\|= [
371	84		100			427	map { $self->$_() } qw{ type start children finish }
	192					466
372							] };
373							}
374
375							sub encoding {
376	34			34	1	102	my ( $self ) = @_;
377	34					128	return $self->{encoding};
378							}
379
380							sub failures {
381	36			36	1	8571	my ( $self ) = @_;
382	36					148	return $self->{failures};
383							}
384
385							sub find {
386	1			1	1	1061	my ( $self, $target ) = @_;
387
388							my $check = CODE_REF eq ref $target ? $target :
389							ref $target ? croak 'find() target may not be ' . ref $target :
390	1	50		5		8	sub { $_[0]->isa( $target ) };
	5	50				25
391	1					3	my @found;
392	1					3	foreach my $elem ( $self, $self->elements() ) {
393	5	100				7	$check->( $elem )
394							and push @found, $elem;
395							}
396
397							@found
398	1	50				4	or return 0;
399
400	1					5	return \@found;
401							}
402
403							sub finish {
404	109			109	1	196	my ( $self, $inx ) = @_;
405							$self->{finish}
406	109	50				234	or return;
407							wantarray
408	109	100				193	and return @{ $self->{finish} };
	55					251
409	54		50			215	return $self->{finish}[ $inx \|\| 0 ];
410							}
411
412							sub handles {
413	0			0	1	0	my ( $self, $string ) = @_;
414	0					0	return $self->_stringify_source( $string, test => 1 );
415							}
416
417							sub indentation {
418	10			10	1	18	my ( $self ) = @_;
419	10					33	return $self->{indentation};
420							}
421
422							sub interpolates {
423	84			84	1	163	my ( $self ) = @_;
424	84					342	return $self->{interpolates};
425							}
426
427							sub location {
428	6			6	1	7	my ( $self ) = @_;
429	6					13	return $self->type()->location();
430							}
431
432							sub _make_token {
433	326			326		607	my ( $self, $class, $content, %arg ) = @_;
434	326					1142	my $token = $class->__new( content => $content, %arg );
435							CLASS_UNKNOWN eq $class
436	326	100				567	and $self->{failures}++;
437							$self->{index_locations}
438	326	100				658	and $self->_update_location( $token );
439	326					955	return $token;
440							}
441
442							sub _update_location {
443	100			100		132	my ( $self, $token ) = @_;
444							$token->{location} # Idempotent
445	100	50				194	and return;
446	100		66			211	my $loc = $self->{_location} \|\|= do {
447							my %loc = (
448							line_content => '',
449							location => $self->{location},
450	19					64	);
451	19	100				61	if ( __instance( $self->{source}, 'PPI::Element' ) ) {
452	17		33			128	$loc{location} \|\|= $self->{source}->location();
453	17	50				3195	if ( my $doc = $self->{source}->document() ) {
454	17					292	$loc{tab_width} = $doc->tab_width();
455							}
456							}
457	19		100			115	$loc{tab_width} \|\|= 1;
458	19					49	\%loc;
459							};
460							$loc->{location}
461	100	50				166	or return;
462	100					117	$token->{location} = [ @{ $loc->{location} } ];
	100					186
463
464	100	50				243	if ( defined( my $content = $token->content() ) ) {
465
466	100					111	my $lines;
467	100					193	pos( $content ) = 0;
468	100					243	$lines++ while $content =~ m/ \n /smxgc;
469	100	100				149	if ( pos $content ) {
470	4					36	$loc->{location}[LOCATION_LINE] += $lines;
471	4					7	$loc->{location}[LOCATION_LOGICAL_LINE] += $lines;
472							$loc->{location}[LOCATION_CHARACTER] =
473	4					6	$loc->{location}[LOCATION_COLUMN] = 1;
474							}
475
476	100	100				214	if ( my $chars = length( $content ) - pos( $content ) ) {
477	83					105	$loc->{location}[LOCATION_CHARACTER] += $chars;
478	83	100	100			208	if ( $loc->{tab_width} > 1 && $content =~ m/ \t /smx ) {
479	4					6	my $pos = $loc->{location}[LOCATION_COLUMN];
480	4					6	my $tab_width = $loc->{tab_width};
481							# Stolen shamelessly from PPI::Document::_visual_length
482	4					4	my ( $vis_inc );
483	4					12	foreach my $part ( split /(\t)/, $content ) {
484	10	100				13	if ($part eq "\t") {
485	5					7	$vis_inc = $tab_width - ($pos-1) % $tab_width;
486							} else {
487	5					5	$vis_inc = length $part;
488							}
489	10					12	$pos += $vis_inc;
490							}
491	4					7	$loc->{location}[LOCATION_COLUMN] = $pos;
492							} else {
493	79					112	$loc->{location}[LOCATION_COLUMN] += $chars;
494							}
495							}
496
497							}
498
499	100					157	return;
500							}
501
502							sub parent {
503	1			1	1	3	return;
504							}
505
506							sub perl_version_introduced {
507	18			18	1	48	my ( $self ) = @_;
508	133					235	return List::Util::max( grep { defined $_ } MINIMUM_PERL,
509							$self->{perl_version_introduced},
510	18					38	map { $_->perl_version_introduced() } $self->elements() );
	97					181
511							}
512
513							sub perl_version_removed {
514	9			9	1	15	my ( $self ) = @_;
515	9					10	my $max;
516	9					17	foreach my $elem ( $self->elements() ) {
517	58	50				110	if ( defined ( my $ver = $elem->perl_version_removed() ) ) {
518	0	0				0	if ( defined $max ) {
519	0	0				0	$ver < $max and $max = $ver;
520							} else {
521	0					0	$max = $ver;
522							}
523							}
524							}
525	9					21	return $max;
526							}
527
528							sub schild {
529	0			0	1	0	my ( $self, $inx ) = @_;
530	0		0			0	$inx \|\|= 0;
531	0					0	my @kids = $self->schildren();
532	0					0	return $kids[$inx];
533							}
534
535							sub schildren {
536	0			0	1	0	my ( $self ) = @_;
537							return (
538	0					0	grep { $_->significant() } $self->children()
	0					0
539							);
540							}
541
542							sub source {
543	10			10	1	15	my ( $self ) = @_;
544	10					23	return $self->{source};
545							}
546
547							sub start {
548	109			109	1	197	my ( $self, $inx ) = @_;
549							$self->{start}
550	109	50				224	or return;
551							wantarray
552	109	100				222	and return @{ $self->{start} };
	55					101
553	54		50			257	return $self->{start}[ $inx \|\| 0 ];
554							}
555
556							sub top {
557	2			2	1	3	my ( $self ) = @_;
558	2					5	return $self;
559							}
560
561							sub type {
562	88			88	1	163	my ( $self, $inx ) = @_;
563							$self->{type}
564	88	50				212	or return;
565							wantarray
566	88	100				212	and return @{ $self->{type} };
	55					141
567	33		50			204	return $self->{type}[ $inx \|\| 0 ];
568							}
569
570							sub variables {
571	40			40	1	86	my ( $self ) = @_;
572
573	40	100				106	$self->interpolates()
574							or return;
575
576	34					63	my %var;
577	34					95	foreach my $kid ( $self->children() ) {
578	67					286	foreach my $sym ( $kid->variables() ) {
579	35					104	$var{$sym} = 1;
580							}
581							}
582	34					353	return ( keys %var );
583							}
584
585							sub _chop {
586	0			0		0	my ( $middle ) = @_;
587	0					0	my $left = substr $middle, 0, 1, '';
588	0					0	my $right = substr $middle, -1, 1, '';
589	0					0	return ( $left, $middle, $right );
590							}
591
592							# decode data using the object's {encoding}
593							# It is anticipated that if I make PPIx::Regexp depend on this package,
594							# that this will be called there.
595
596							sub __decode {
597	23			23		91	my ( $self, $data, $encoding ) = @_;
598	23		33			150	$encoding \|\|= $self->{encoding};
599	23	50	33			161	defined $encoding
600							and _encode_available()
601							or return $data;
602	0					0	return Encode::decode( $encoding, $data );
603							}
604
605							{
606
607							my $encode_available;
608
609							sub _encode_available {
610	0	0		0		0	defined $encode_available and return $encode_available;
611	0	0				0	return ( $encode_available = eval {
612	0					0	require Encode;
613	0					0	1;
614							} ? 1 : 0
615							);
616							}
617
618							}
619
620							{
621							my ( $cached_doc, $cached_encoding );
622
623							# These are the byte order marks documented as being recognized by
624							# PPI. Only utf-8 is documented as supported.
625							my %known_bom = (
626							'EFBBBF' => 'utf-8',
627							'0000FEFF' => 'utf-32be',
628							'FFFE0000' => 'utf-32le',
629							'FEFF' => 'utf-16be',
630							'FFFE' => 'utf-16le',
631							);
632
633							sub _get_ppi_encoding {
634	17			17		28	my ( $elem ) = @_;
635
636	17	50				93	my $doc = $elem->top()
637							or return;
638
639	17	100	66			254	$cached_doc
640							and $doc == $cached_doc
641							and return $cached_encoding;
642
643	11	50				46	my $bom = $doc->first_element()
644							or return;
645
646	11					67	Scalar::Util::weaken( $cached_doc = $doc );
647
648	11	50				55	if ( $bom->isa( 'PPI::Token::BOM' ) ) {
649							return ( $cached_encoding = $known_bom{
650	0					0	uc unpack 'H*', $bom->content() } );
651							}
652
653	11					17	$cached_encoding = undef;
654
655	11					18	foreach my $use (
656	11	100				25	@{ $doc->find( 'PPI::Statement::Include' ) \|\| [] }
657							) {
658	2	50				1340	'use' eq $use->type()
659							or next;
660	2	50				44	defined( my $module = $use->module() )
661							or next;
662	2	50				43	'utf8' eq $module
663							or next;
664	0					0	$cached_encoding = 'utf-8';
665	0					0	last;
666							}
667
668	11					1857	return $cached_encoding;
669
670							}
671
672							}
673
674							# This subroutine was created in an attempt to simplify control flow.
675							# Argument 2 (from 0) is not unpacked because the caller needs to see
676							# the side effects of matches made on it.
677
678							{
679
680							my %special = (
681							'$$' => sub { # Process ID.
682							my ( undef, $sigil ) = @_;
683							return [ CLASS_INTERPOLATION, $sigil ];
684							},
685							'$' => sub { # Called if we find (e.g.) '$@'
686							my ( undef, $sigil ) = @_;
687							$_[2] =~ m/ \G ( [\@] ) /smxgc
688							or return;
689							return [ CLASS_INTERPOLATION, "$sigil$1" ];
690							},
691							'@' => sub { # Called if we find '@@'.
692							my ( undef, $sigil ) = @_;
693							return [ CLASS_STRING, $sigil ];
694							},
695							);
696
697							sub _interpolation { ## no critic (RequireArgUnpacking)
698	51			51		120	my ( $self, $sigil ) = @_;
699							# Argument $_[2] is $content, but we can't unpack it because we
700							# need the caller to see any changes to pos().
701
702	51	100				162	if ( $_[2] =~ m/ \G (?= \{ ) /smxgc ) {
703							# variable name enclosed in {}
704	14					48	my $delim_re = __match_enclosed( qw< { > );
705	14	50				782	if ( $_[2] =~ m/ \G $delim_re /smxgc ) {
706	14					37	my $rest = $1;
707	14	100				83	$rest =~ m/ \A \{ \s* \[ ( .* ) \] \s* \} \z /smx
708							or return [ CLASS_INTERPOLATION, "$sigil$rest" ];
709							# At this point we have @{[ ... ]}.
710	5					9	my @arg;
711	5	100				17	_has_postderef( "$1" )
712							and push @arg, postderef => 1;
713	5					286	return [ CLASS_INTERPOLATION, "$sigil$rest", @arg ];
714							}
715	0	0				0	$_[2] =~ m/ \G ( .* ) /smxgc
716							and return [ CLASS_UNKNOWN, "$sigil$1",
717							error => MISMATCHED_DELIM ];
718	0					0	confess 'Failed to match /./';
719							}
720
721	37	100				128	if ( $_[2] =~ m< \G ( @{[ VARIABLE_RE ]} ) >smxgco
	5					605
722							) {
723							# variable name not enclosed in {}
724	34					92	my $interp = "$sigil$1";
725	34					130	while ( $_[2] =~ m/ \G ( (?: -> )? ) (?= ( [[{] ) ) /smxgc ) { # }]
726	2					5	my $lead_in = $1;
727	2					21	my $delim_re = __match_enclosed( $2 );
728	2	50				132	if ( $_[2] =~ m/ \G ( $delim_re ) /smxgc ) {
729	2					12	$interp .= "$lead_in$1";
730							} else {
731	0					0	$_[2] =~ m/ ( .* ) /smxgc;
732							return (
733	0					0	[ CLASS_INTERPOLATION, $interp ],
734							[ CLASS_UNKNOWN, "$1", error => MISMATCHED_DELIM ],
735							);
736							}
737							}
738
739	34					41	my @arg;
740
741	34	100				89	if ( defined( my $deref = _match_postderef( $_[2] ) ) ) {
742	6					11	$interp .= $deref;
743	6					20	push @arg, postderef => 1;
744							}
745
746	34					131	return [ CLASS_INTERPOLATION, $interp, @arg ];
747							}
748
749	3					7	my $code;
750	3	50	33			18	$code = $special{$sigil}
751							and my $elem = $code->( $self, $sigil, $_[2] )
752							or return [ CLASS_UNKNOWN, $sigil,
753							error => 'Sigil without interpolation' ];
754
755	3					8	return $elem;
756							}
757
758							}
759
760							sub _link_elems {
761	62			62		117	my ( $self, @arg ) = @_;
762
763	62					85	push @{ $self->{children} }, @arg;
	62					119
764
765	62					121	foreach my $key ( qw{ type start children finish } ) {
766	248					302	my $prev;
767	248					247	foreach my $elem ( @{ $self->{$key} } ) {
	248					388
768	326					780	Scalar::Util::weaken( $elem->{parent} = $self );
769	326	100				480	if ( $prev ) {
770	81					201	Scalar::Util::weaken( $elem->{previous_sibling} = $prev );
771	81					168	Scalar::Util::weaken( $prev->{next_sibling} = $elem );
772							}
773	326					457	$prev = $elem;
774							}
775							}
776
777	62					593	return $self;
778							}
779
780							{
781							my %allow_subscr = map { $_ => 1 } qw{ % @ };
782
783							# Match a postfix deref at the current position in the argument. If
784							# a match occurs it is returned, and the current position is
785							# updated. If not, nothing is returned, and the current position in
786							# the argument remains unchanged.
787							# This would all be much easier if I could count on Perl 5.10
788							sub _match_postderef { ## no critic (RequireArgUnpacking)
789	34			34		50	my $pos = pos $_[0];
790							# Only scalars and arrays interpolate
791	34	100				103	$_[0] =~ m/ \G ( -> ) ( \$ \# \| [\$\@] ) /smxgc
792							or return;
793	6					17	my $match = "$1$2";
794	6					13	my $sigil = $2;
795	6	100				27	$_[0] =~ m/ \G ( [*] ) /smxgc
796							and return "$match$1";
797
798	1	50	33			10	if (
799							$allow_subscr{$sigil} &&
800							$_[0] =~ m/ \G (?= ( [[{] ) ) /smxgc # }]
801							) {
802	1					5	my $re = __match_enclosed( "$1" );
803	1	50				72	$_[0] =~ m/ \G $re /smxgc
804							and return "$match$1";
805							}
806
807	0					0	pos $_[0] = $pos;
808	0					0	return;
809							}
810							}
811
812							{
813	7			7		56	no warnings qw{ qw }; ## no critic (ProhibitNoWarnings)
	7					12
	7					5493
814							my %is_postderef = map { $_ => 1 } qw{ $ $# @ % & * $* $#* @* %* &* ** };
815							sub _has_postderef {
816	5			5		11	my ( $string ) = @_;
817	5					28	my $doc = PPI::Document->new( \$string );
818	5	100				4958	foreach my $elem ( @{ $doc->find( 'PPI::Token::Symbol' ) \|\| [] } ) {
	5					18
819	3	50				655	my $next = $elem->snext_sibling()
820							or next;
821	3	50				66	$next->isa( 'PPI::Token::Operator' )
822							or next;
823	3	50				6	$next->content() eq '->'
824							or next;
825	3	50				16	$next = $next->snext_sibling()
826							or next;
827	3	50				49	$next->isa( 'PPI::Token::Cast' )
828							or next;
829	3					7	my $content = $next->content();
830	3	50				11	$is_postderef{$content}
831							or next;
832	3	50				18	$content =~ m/ \* \z /smx
833							and return 1;
834	0	0				0	$next = $next->snext_sibling()
835							or next;
836	0	0				0	$next->isa( 'PPI::Structure::Subscript' )
837							and return 1;
838							}
839	2					674	return 0;
840							}
841							}
842
843							# For various reasons we may get consecutive literals -- typically
844							# strings. We want to merge these. The arguments are array refs, with
845							# the class name of the token in [0] and the content in [1]. I know of
846							# no way we can generate consecutive white space tokens, but if I did I
847							# would want them merged.
848							#
849							# NOTE that merger loses all attributes of the second token, so we MUST
850							# NOT merge CLASS_UNKNOWN tokens, or any class that might have
851							# attributes other than content.
852							{
853							my %can_merge = map { $_ => 1 } CLASS_STRING, CLASS_WHITESPACE;
854
855							sub _merge_strings {
856	59			59		126	my @arg = @_;
857	59					74	my @rslt;
858	59					118	foreach my $elem ( @arg ) {
859	165	100	100			597	if ( @rslt && $can_merge{$elem->[0]}
			100
860							&& $elem->[0] eq $rslt[-1][0]
861							) {
862	5					13	$rslt[-1][1] .= $elem->[1];
863							} else {
864	160					235	push @rslt, $elem;
865							}
866							}
867	59					152	return @rslt;
868							}
869							}
870
871							# If we're processing an indented here document, strings must be split
872							# on new lines and un-indented. We return array refs rather than
873							# objects because we may be called before we're ready to build the
874							# objects.
875							sub _remove_here_doc_indentation {
876	53			53		143	my ( $self, $string, %arg ) = @_;
877
878							# NOTE that we rely on the fact that both undef (not indented) and
879							# '' (indented by zero characters) evaluate false.
880							$self->{indentation}
881	53	100				219	or return [ CLASS_STRING, $string ];
882
883	5					5	my $ignore_first;
884	5	100				14	if ( $arg{sibling} ) {
885							# Because the calling code primes the pump, @sibling will never
886							# be empty, even when processing the first token. So:
887							# * The pump-priming specifies class '', so if that is what we
888							# see we must process the first line; otherwise
889							# * If the previous token is a string ending in "\n", we must
890							# process the first line.
891							$ignore_first = '' ne $arg{sibling}[-1][0] && (
892							CLASS_STRING ne $arg{sibling}[-1][0] \|\|
893	2		66			12	$arg{sibling}[-1][1] !~ m/ \n \z /smx );
894							} else {
895							# Without @sibling, we unconditionally process the first line.
896	3					7	$ignore_first = 0;
897							}
898
899	5					9	my @rslt;
900
901	5					43	foreach ( split qr/ (?<= \n ) /smx, $string ) {
902	7	100				15	if ( $ignore_first ) {
903	1					6	push @rslt, [ CLASS_STRING, "$_" ];
904	1					2	$ignore_first = 0;
905							} else {
906	6	100				86	if ( "\n" eq $_ ) {
		50
907	1					3	push @rslt,
908							[ CLASS_STRING, "$_" ],
909							;
910							} elsif ( s/ ( $self->{_indentation_re} ) //smx ) {
911	5					46	push @rslt,
912							[ CLASS_WHITESPACE, "$1" ],
913							[ CLASS_STRING, "$_" ],
914							;
915							} else {
916	0					0	push @rslt,
917							[ CLASS_UNKNOWN, "$_", error => NO_INDENTATION ],
918							;
919							}
920							}
921							}
922
923	5					20	return @rslt;
924							}
925
926							sub _stringify_source {
927	114			114		223	my ( $self, $string, %opt ) = @_;
928
929	114	100				280	if ( Scalar::Util::blessed( $string ) ) {
930
931	69	50				147	$string->isa( 'PPI::Element' )
932							or return;
933
934	69					132	foreach my $class ( qw{
935							PPI::Token::Quote
936							PPI::Token::QuoteLike::Backtick
937							PPI::Token::QuoteLike::Command
938							PPI::Token::QuoteLike::Readline
939							} ) {
940	228	100				486	$string->isa( $class )
941							or next;
942							$opt{test}
943	16	50				50	and return 1;
944
945	16					51	my $encoding = _get_ppi_encoding( $string );
946	16					74	return $self->__decode( $string->content(), $encoding );
947							}
948
949	53	100				103	if ( $string->isa( 'PPI::Token::HereDoc' ) ) {
950							$opt{test}
951	1	50				3	and return 1;
952
953	1					2	my $encoding = _get_ppi_encoding( $string );
954							my $heredoc = join '',
955	1					8	map { $self->__decode( $_, $encoding) }
	5					21
956							$string->heredoc();
957	1					5	my $terminator = $self->__decode( $string->terminator(),
958							$encoding );
959	1					4	$terminator =~ s/ (?<= \n ) \z /\n/smx;
960	1					5	return $self->__decode( $string->content(), $encoding ) .
961							"\n" . $heredoc . $terminator;
962							}
963
964	52					280	return;
965
966							}
967
968	45	50				140	ref $string
969							and return;
970
971							$string =~ m/ \A \s* (?: q [qx]? \| << \| [`'"<] ) /smx
972	45	50				391	and return $opt{test} ? 1 : $string;
		50
973
974	0					0	return;
975							}
976
977							sub _unquote {
978	7			7		16	my ( $string ) = @_;
979	7	100				92	$string =~ s/ \A ['"] //smx
980							and chop $string;
981	7					23	$string =~ s/ \\ (?= . ) //smxg;
982	7					19	return $string;
983							}
984
985							1;
986
987							__END__