File Coverage

blib/lib/Text/Extract/MaketextCallPhrases.pm

Criterion	Covered	Total	%
statement	222	231	96.1
branch	130	154	84.4
condition	84	116	72.4
subroutine	9	9	100.0
pod	2	2	100.0
total	447	512	87.3

line	stmt	bran	cond	sub	pod	time	code
1							package Text::Extract::MaketextCallPhrases;
2
3	8			8		123650	use strict;
	8					12
	8					182
4	8			8		25	use warnings;
	8					10
	8					231
5
6							$Text::Extract::MaketextCallPhrases::VERSION = '0.94';
7
8	8			8		4726	use Text::Balanced ();
	8					102513
	8					226
9	8			8		3055	use String::Unquotemeta ();
	8					1365
	8					151
10
11							# So we don't have to maintain an identical regex
12	8			8		2968	use Module::Want 0.6 ();
	8					6017
	8					304
13							my $ns_regexp = Module::Want::get_ns_regexp();
14
15							my $NO_EXTRACT_KEY = '## no extract maketext';
16
17							sub import {
18	8			8		36	no strict 'refs'; ## no critic
	8					7
	8					16560
19	8			8		67	*{ caller() . '::get_phrases_in_text' } = \&get_phrases_in_text;
	8					45
20	8					12	*{ caller() . '::get_phrases_in_file' } = \&get_phrases_in_file;
	8					6281
21							}
22
23							my $default_regexp_conf_item = [
24							qr/(?:(?:^\|\:\|\s\|=\|\(\|\.\|\b)translatable\|(?:make\|lex)text(?:_[a-zA-Z0-9_]+_context)?)\s*\(?/,
25							sub { return substr( $_[0], -1, 1 ) eq '(' ? qr/\s\)/ : qr/\s\;/ },
26							];
27
28							sub get_phrases_in_text {
29
30							# 3rd arg is used internally to get the line number in the 'debug_ignored_matches' results when called via get_phrases_in_file(). Don't rely on this as it may change.
31	58			58	1	159764	my ( $text, $conf_hr, $linenum ) = @_; # 3rd arg is used internally to get the line number in the 'debug_ignored_matches' results when called via get_phrases_in_file(). Don't rely on this as it may change.
32
33	58		100			210	$conf_hr \|\|= {};
34
35	58	50				142	if ( $conf_hr->{'encode_unicode_slash_x'} ) {
36	0	0				0	Module::Want::have_mod('Encode') \|\| die $@;
37							}
38
39	58	100	66			168	if ( $conf_hr->{'cpanel_mode'} && $conf_hr->{'cpanel_mode'} != 0 ) {
40	1					2	$conf_hr->{'cpanel_mode'} = '0E0';
41	1					2	push @{ $conf_hr->{'regexp_conf'} }, [ qr/\/ ], [ qr/(?:^\|[^<])cptext\s$/, qr/\s$/ ], [ qr/Cpanel::Exception(?:::$ns_regexp)?->new$/, qr/\s$/ ], [ qr/Cpanel::Exception(?:::$ns_regexp)?::create$/, qr/\s$/, { 'optional' => 1, 'arg_position' => 2 } ], [ qr/Cpanel::Exception(?:::$ns_regexp)?\-\>create$/, qr/\s*$/, { 'optional' => 1 } ],
	1					92
42							[ qr/Cpanel::LocaleString->new$\s/, qr/\s$/ ];
43							}
44
45	58					61	my @results;
46							my %offset_seen;
47
48							# I like this alignment better than what tidy does, seems clearer to me even if a bit overkill perhaps
49							#tidyoff
50	58	100				153	for my $regexp (
		100
51							$conf_hr->{'regexp_conf'} ? (
52	10					26	$conf_hr->{'no_default_regex'} ? @{ $conf_hr->{'regexp_conf'} }
53	3					8	: ( $default_regexp_conf_item, @{ $conf_hr->{'regexp_conf'} } )
54							)
55							: ($default_regexp_conf_item)
56							) {
57							#tidyon
58	66					71	my $text_working_copy = $text;
59	66					66	my $original_len = length($text_working_copy);
60
61	66	100	66			279	my $rx_conf_hr = defined $regexp->[2] && ref( $regexp->[2] ) eq 'HASH' ? $regexp->[2] : { 'optional' => 0, 'arg_position' => 0 };
62	66	100				168	$rx_conf_hr->{arg_position} = exists $rx_conf_hr->{arg_position} ? int( abs( $rx_conf_hr->{arg_position} ) ) : 0; # if caller passes a non-numeric value this should warn, that is a feature!
63
64	66					230	my $token_rx = qr/$regexp->[0]/;
65	66					61	my ( $did_match, $matched, $no_extract_index );
66	66					61	while (1) {
67	583	50				794	last if !defined $text_working_copy;
68
69	583					4597	$did_match = $text_working_copy =~ $token_rx;
70
71	583	100				677	if ($did_match) {
72	517					1081	$no_extract_index = index(
73							substr( $text_working_copy, 0, $+[0] ),
74							$NO_EXTRACT_KEY,
75							);
76	517					1332	$matched = substr( $text_working_copy, $-[0], $+[0] - $-[0] );
77							}
78							else {
79	66					94	$no_extract_index = index( $text_working_copy, $NO_EXTRACT_KEY );
80	66	50				247	last if -1 == $no_extract_index;
81							}
82
83							# we have a (possibly multiline) chunk w/ notation-not-preceded-by-token that we should ignore
84	517	50	33			904	if ( -1 != $no_extract_index && ( !$did_match \|\| ( $no_extract_index < $-[0] ) ) ) {
			66
85	4					31	$text_working_copy =~ s/.* \Q$NO_EXTRACT_KEY\E [^\n]*//x;
86	4					7	next;
87							}
88
89	513					343	my $pre;
90
91							# TODO: incorporate the \s* into results: 'post_token_ws' => $1 \|\| '' ?
92	513					5560	( $pre, $text_working_copy ) = split( m/(?:$regexp->[0]\|$NO_EXTRACT_KEY)\s/, $text_working_copy, 2 ); # the \s takes into account trailing WS that Text::Balanced ignores which then can throw off the offset
93
94							# we have a token line that we should ignore
95	513	100				1996	next if $text_working_copy =~ s/^[^\n]* \Q$NO_EXTRACT_KEY\E [^\n]*//x;
96
97	507					452	my $offset = $original_len - length($text_working_copy);
98
99	507					307	my $phrase;
100	507					1130	my $result_hr = { 'is_error' => 0, 'is_warning' => 0, 'offset' => $offset, 'regexp' => $regexp, 'matched' => $matched };
101
102	507	100				773	if ( $conf_hr->{'ignore_perlish_comments'} ) {
103
104							# ignore matches in a comment
105	6	100	100			25	if ( $pre =~ m/\#/ && $pre !~ m/[\n\r]$/ ) {
106	2					6	my @lines = split( /[\n\r]+/, $pre );
107
108	2	100				6	if ( $lines[-1] =~ m/\#/ ) {
109	1					3	$result_hr->{'type'} = 'comment';
110	1	50				3	$result_hr->{'line'} = $linenum if defined $linenum;
111	1					1	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	1					2
112	1					2	next;
113							}
114							}
115							}
116
117							# ignore functions named *$1
118	506	100	100			1253	if ( $text_working_copy =~ m/^\s\{/ && $matched !~ m/\(\s$/ ) {
119	4					8	$result_hr->{'type'} = 'function';
120	4	50				6	$result_hr->{'line'} = $linenum if defined $linenum;
121	4					4	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	4					52
122	4					28	next;
123							}
124
125							# ignore assignments to things named *maketext
126	502	100				864	if ( $text_working_copy =~ m/^\s*=/ ) {
127	2					4	$result_hr->{'type'} = 'assignment';
128	2	50				4	$result_hr->{'line'} = $linenum if defined $linenum;
129	2					2	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	2					3
130	2					3	next;
131							}
132
133	500	100				626	if ( $conf_hr->{'ignore_perlish_statement'} ) {
134
135							# ignore a statement named *maketext (e.g. goto &XYZ::maketext;)
136	3	100				6	if ( $text_working_copy =~ m/^\s*;/ ) {
137	1					2	$result_hr->{'type'} = 'statement';
138	1	50				2	$result_hr->{'line'} = $linenum if defined $linenum;
139	1					2	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	1					2
140	1					1	next;
141							}
142							}
143
144							# phrase is argument N (instead of first)
145	499	100				666	if ( $rx_conf_hr->{'arg_position'} > 0 ) {
146
147							# hack away the args before the one at $arg_position
148	25					61	for my $at_index ( 1 .. $rx_conf_hr->{'arg_position'} ) {
149	55					1621	$text_working_copy =~ s{^\s\,\s}{};
150	55	100				99	if ( $at_index >= $rx_conf_hr->{'arg_position'} ) {
151	25					27	$result_hr->{'offset'} = $original_len - length($text_working_copy);
152	25					29	last;
153							}
154
155	30					60	( $phrase, $text_working_copy ) = Text::Balanced::extract_variable($text_working_copy);
156
157	30	50				681	if ( !defined $phrase ) {
158	30					58	( $phrase, $text_working_copy ) = Text::Balanced::extract_quotelike($text_working_copy);
159							}
160							}
161							}
162
163	499					865	( $phrase, $text_working_copy ) = Text::Balanced::extract_variable($text_working_copy);
164	499	100				15155	my $optional_perlish =
		100
165							$text_working_copy =~ m/^\s*\[/ ? "ARRAY"
166							: $text_working_copy =~ m/^\s*\{/ ? "HASH"
167							: 0;
168
169	499	100				545	if ( !$phrase ) {
170
171							# undef $@;
172	469					360	my ( $type, $inside, $opener, $closer );
173	469					689	( $phrase, $text_working_copy, undef, $type, $opener, $inside, $closer ) = Text::Balanced::extract_quotelike($text_working_copy);
174	469	50				29189	$text_working_copy = '' if !defined $text_working_copy;
175
176	469	100	100			2307	$result_hr->{'quotetype'} = 'single' if ( defined $opener && $opener eq "'" ) \|\| ( defined $type && ( $type eq 'q' \|\| $type eq 'qw' ) );
			100
			66
			66
177	469	100	100			1937	$result_hr->{'quotetype'} = 'double' if ( defined $opener && $opener eq '"' ) \|\| ( defined $type && $type eq 'qq' );
			100
			66
178	469	100				645	if ( $result_hr->{'quotetype'} ) {
179	290					394	$result_hr->{'quote_before'} = $type . $opener;
180	290					464	$result_hr->{'quote_after'} = $closer;
181							}
182
183	469	100	100			1211	if ( defined $type && $type eq '<<' ) {
184	22					36	$result_hr->{'quote_before'} = $type . $opener;
185	22					22	$result_hr->{'quote_after'} = $closer;
186
187	22					29	$result_hr->{'heredoc'} = $opener;
188	22	100				37	if ( substr( $opener, 0, 1 ) eq "'" ) {
189	19					21	$result_hr->{'quotetype'} = 'single';
190							}
191							else {
192	3					5	$result_hr->{'quotetype'} = 'double';
193							}
194							}
195
196	469	100	66			2200	if ( defined $inside && ( exists $result_hr->{'quotetype'} ) && $inside eq '' ) {
		100	100
		50	66
			33
			33
			0
			0
197	30					26	$result_hr->{'is_error'} = 1;
198	30					30	$result_hr->{'type'} = 'empty';
199	30					34	$phrase = $inside;
200							}
201							elsif ( defined $inside && $inside ) {
202	324					303	$phrase = $inside;
203
204	324	100	100			2643	if ( $type eq 'qw' ) {
		100	100
		100	100
			100
			100
205	21	100				53	if ( $phrase =~ m/\A(\s+)/ ) {
206	2					3	$result_hr->{'quote_before'} .= $1;
207	2					5	$phrase =~ s/\A(\s+)//;
208							}
209	21	100				59	if ( $phrase =~ m/(\s+)\z/ ) {
210	2					3	$result_hr->{'quote_after'} = $1 . $result_hr->{'quote_after'};
211	2					5	$phrase =~ s/(\s+)\z//;
212							}
213
214	21	100				43	if ( $phrase =~ m/(\s+)/ ) {
215	19					36	$result_hr->{'quote_after'} = $1;
216							}
217
218							# otherwise leave quote_after asis for cases like this: qw(foo)
219
220	21					53	($phrase) = split( /\s+/, $phrase, 2 );
221							}
222							elsif ( $type eq 'qx' \|\| $opener eq '`' ) {
223	12					14	$result_hr->{'is_warning'} = 1;
224	12					14	$result_hr->{'type'} = 'command';
225							}
226							elsif ( $type eq 'm' \|\| $type eq 'qr' \|\| $type eq 's' \|\| $type eq 'tr' \|\| $opener eq '/' ) {
227	30					30	$result_hr->{'is_warning'} = 1;
228	30					37	$result_hr->{'type'} = 'pattern';
229							}
230							}
231							elsif ( defined $opener && defined $inside && defined $closer && defined $phrase && $phrase eq "$opener$inside$closer" ) {
232	0					0	$result_hr->{'is_error'} = 1;
233	0					0	$result_hr->{'type'} = 'empty';
234	0					0	$phrase = $inside;
235							}
236							else {
237	115					84	my $is_no_arg = 0;
238	115	50				178	if ( defined $regexp->[1] ) {
239	115	100				204	if ( ref( $regexp->[1] ) eq 'CODE' ) {
		50
240	89					104	my $rgx = $regexp->[1]->($matched);
241	89	100				779	if ( $text_working_copy =~ m/^$rgx/ ) {
242	37					52	$is_no_arg = 1;
243							}
244							}
245							elsif ( ref( $regexp->[1] ) eq 'Regexp' ) {
246	26					100	my $rgx = qr/^$regexp->[1]/;
247	26	100				97	if ( $text_working_copy =~ $rgx ) {
248	15					27	$is_no_arg = 1;
249							}
250							}
251							}
252
253	115	100				643	if ($is_no_arg) {
		100
254	52	100				72	if ( $rx_conf_hr->{'optional'} ) {
255	13					25	next;
256							}
257							else {
258	39					38	$result_hr->{'is_error'} = 1;
259	39					61	$result_hr->{'type'} = 'no_arg';
260							}
261							}
262							elsif ( $text_working_copy =~ m/^\s(((?:\&\|\\\)?)$ns_regexp(?:\-\>$ns_regexp)?((?:\s*\()?))/o ) {
263	44					86	$phrase = $1;
264	44	100	66			144	my $perlish = $2 \|\| $3 ? 1 : 0;
265
266	44					366	$text_working_copy =~ s/\s(?:\&\|\\\)?$ns_regexp(?:\-\>$ns_regexp)?(?:\s\()?\s//o;
267
268	44					60	$result_hr->{'is_warning'} = 1;
269	44	100				87	$result_hr->{'type'} = $perlish ? 'perlish' : 'bareword';
270							}
271							}
272							}
273							else {
274	30					31	$result_hr->{'is_warning'} = 1;
275	30					36	$result_hr->{'type'} = 'perlish';
276							}
277
278	486	100				569	if ( !defined $phrase ) {
279	58					45	my $is_no_arg = 0;
280	58	50				104	if ( defined $regexp->[1] ) {
281	58	100				89	if ( ref( $regexp->[1] ) eq 'CODE' ) {
		50
282	49					56	my $rgx = $regexp->[1]->($matched);
283	49	100				409	if ( $text_working_copy =~ m/^$rgx/ ) {
284	37					44	$is_no_arg = 1;
285							}
286							}
287							elsif ( ref( $regexp->[1] ) eq 'Regexp' ) {
288	9					38	my $rgx = qr/^$regexp->[1]/;
289	9	100				23	if ( $text_working_copy =~ $rgx ) {
290	2					4	$is_no_arg = 1;
291							}
292							}
293							}
294
295	58	100				85	if ($is_no_arg) {
296	39	50				47	if ( $rx_conf_hr->{'optional'} ) {
297	0					0	next;
298							}
299							else {
300	39					33	$result_hr->{'is_error'} = 1;
301	39					43	$result_hr->{'type'} = 'no_arg';
302							}
303							}
304							else {
305	19	100				24	if ($optional_perlish) {
306	7	100				10	if ( $rx_conf_hr->{'optional'} ) {
307	3					6	next;
308							}
309							else {
310	4					4	$result_hr->{'is_warning'} = 1;
311	4					5	$result_hr->{'type'} = 'perlish';
312	4					4	$phrase = $optional_perlish;
313							}
314							}
315							else {
316	12					13	$result_hr->{'is_warning'} = 1;
317	12					15	$result_hr->{'type'} = 'multiline';
318							}
319							}
320							}
321							else {
322	428					470	$result_hr->{'original_text'} = $phrase;
323
324							# make sure its wasn't a tricky variable in quotes like maketext("$foo->{zip}")
325							# '$foo->{zip}' ' $foo->{zip} ' " $foo->{zip} " to but that seems like a good idea to flag as wonky and in need of human follow up
326	428					726	my ( $var, $for, $aft ) = Text::Balanced::extract_variable($phrase);
327	428	100	66			12818	if ( $var && defined $for && defined $aft && $for =~ m/\A\s\z/ && $aft =~ m/\A\s\z/ ) {
			66
			33
			33
328	34					46	$result_hr->{'is_warning'} = 1;
329	34					39	$result_hr->{'type'} = 'perlish';
330							}
331							else {
332	394	100				603	if ( exists $result_hr->{'quotetype'} ) {
333	308	100				507	if ( $result_hr->{'quotetype'} eq 'single' ) {
		50
334
335							# escape \n\t etc to preserver them during unquotemeta()
336	206					288	$phrase =~ s{(\\(?:n\|t\|f\|r\|a\|b))}{\\$1}g;
337							}
338							elsif ( $result_hr->{'quotetype'} eq 'double' ) {
339
340							# interpolate \n\t etc
341	102					142	$phrase =~ s{(\\(?:n\|t\|f\|r\|a\|b))}{eval qq{"$1"}}eg;
	6					278
342							}
343							}
344
345	394	50				466	if ( $conf_hr->{'encode_unicode_slash_x'} ) {
346
347							# Turn Unicode string \x{} into bytes strings
348	0					0	$phrase =~ s{(\\x\{[0-9a-fA-F]+\})}{Encode::encode_utf8( eval qq{"$1"} )}eg;
	0					0
349							}
350							else {
351
352							# Preserve Unicode string \x{} for unquotemeta()
353	394					427	$phrase =~ s{(\\)(x\{[0-9a-fA-F]+\})}{$1$1$2}g;
354							}
355
356							# Turn graphemes into characters to avoid quotemeta() problems
357	394					285	$phrase =~ s{((:?\\x[0-9a-fA-F]{2})+)}{eval qq{"$1"}}eg;
	2					98
358	394	100	100			1278	$phrase = String::Unquotemeta::unquotemeta($phrase) unless exists $result_hr->{'type'} && $result_hr->{'type'} eq 'perlish';
359							}
360							}
361
362	483					2241	$result_hr->{'phrase'} = $phrase;
363
364	483	50				1708	push @results, $result_hr if ++$offset_seen{ $result_hr->{'offset'} } == 1;
365							}
366							}
367
368	58					207	return [ sort { $a->{'offset'} <=> $b->{'offset'} } @results ];
	482					575
369							}
370
371							sub get_phrases_in_file {
372	1			1	1	1998	my ( $file, $regex_conf ) = @_;
373
374	1	50				29	open my $fh, '<', $file or return;
375
376	1					2	my @results;
377	1					1	my $prepend = '';
378	1					2	my $linenum = 0;
379	1					1	my $in_multi_line = 0;
380	1					2	my $line; # buffer
381
382	1					15	while ( $line = readline($fh) ) {
383	12					12	$linenum++;
384
385	12					7	my $initial_result_count = @results;
386	12	100				11	push @results, map { $_->{'file'} = $file; $_->{'line'} = $in_multi_line ? $in_multi_line : $linenum; $_ } @{ get_phrases_in_text( $prepend . $line, $regex_conf, $linenum ) };
	9					15
	9					15
	9					13
	12					24
387	12					13	my $updated_result_count = @results;
388
389	12	50	66			107	if ( $in_multi_line && $updated_result_count == $initial_result_count ) {
		100	66
		100	66
			66
			100
			66
390	0					0	$prepend = $prepend . $line;
391	0					0	next;
392							}
393							elsif ( $in_multi_line && $updated_result_count > $initial_result_count && $results[-1]->{'type'} ) {
394	4					4	$prepend = $prepend . $line;
395	4					4	pop @results;
396	4					13	next;
397							}
398							elsif ( !$in_multi_line && @results && defined $results[-1]->{'type'} && $results[-1]->{'type'} eq 'multiline' ) {
399	2					6	$in_multi_line = $linenum;
400	2					2	my $trailing_partial = pop @results;
401
402	2					9	require bytes;
403	2	50				6	my $offset = $trailing_partial->{'offset'} > bytes::length( $prepend . $line ) ? bytes::length( $prepend . $line ) : $trailing_partial->{'offset'};
404	2					11	$prepend = $trailing_partial->{'matched'} . substr( "$prepend$line", $offset );
405	2					7	next;
406							}
407							else {
408	6					5	$in_multi_line = 0;
409	6					20	$prepend = '';
410							}
411							}
412
413	1					6	close $fh;
414
415	1					5	return \@results;
416							}
417
418							1;
419
420							__END__