File Coverage

blib/lib/Text/Extract/MaketextCallPhrases.pm

Criterion	Covered	Total	%
statement	215	224	95.9
branch	127	148	85.8
condition	85	113	75.2
subroutine	9	9	100.0
pod	2	2	100.0
total	438	496	88.3

line	stmt	bran	cond	sub	pod	time	code
1							package Text::Extract::MaketextCallPhrases;
2
3	8			8		157013	use strict;
	8					17
	8					290
4	8			8		39	use warnings;
	8					12
	8					368
5
6							$Text::Extract::MaketextCallPhrases::VERSION = '0.93';
7
8	8			8		6500	use Text::Balanced ();
	8					121689
	8					262
9	8			8		3403	use String::Unquotemeta ();
	8					1629
	8					176
10
11							# So we don't have to maintain an identical regex
12	8			8		3449	use Module::Want 0.6 ();
	8					6475
	8					296
13							my $ns_regexp = Module::Want::get_ns_regexp();
14
15							sub import {
16	8			8		46	no strict 'refs'; ## no critic
	8					10
	8					20257
17	8			8		84	*{ caller() . '::get_phrases_in_text' } = \&get_phrases_in_text;
	8					71
18	8					17	*{ caller() . '::get_phrases_in_file' } = \&get_phrases_in_file;
	8					7740
19							}
20
21							my $default_regexp_conf_item = [
22							qr/(?:(?:^\|\:\|\s\|=\|\(\|\.\|\b)translatable\|(?:make\|lex)text(?:_[a-zA-Z0-9_]+_context)?)\s*\(?/,
23							sub { return substr( $_[0], -1, 1 ) eq '(' ? qr/\s\)/ : qr/\s\;/ },
24							];
25
26							sub get_phrases_in_text {
27
28							# 3rd arg is used internally to get the line number in the 'debug_ignored_matches' results when called via get_phrases_in_file(). Don't rely on this as it may change.
29	58			58	1	228746	my ( $text, $conf_hr, $linenum ) = @_; # 3rd arg is used internally to get the line number in the 'debug_ignored_matches' results when called via get_phrases_in_file(). Don't rely on this as it may change.
30
31	58		100			257	$conf_hr \|\|= {};
32
33	58	50				160	if ( $conf_hr->{'encode_unicode_slash_x'} ) {
34	0	0				0	Module::Want::have_mod('Encode') \|\| die $@;
35							}
36
37	58	100	66			183	if ( $conf_hr->{'cpanel_mode'} && $conf_hr->{'cpanel_mode'} != 0 ) {
38	1					2	$conf_hr->{'cpanel_mode'} = '0E0';
39	1					2	push @{ $conf_hr->{'regexp_conf'} }, [ qr/\/ ], [ qr/(?:^\|[^<])cptext\s$/, qr/\s$/ ], [ qr/Cpanel::Exception(?:::$ns_regexp)?->new$/, qr/\s$/ ], [ qr/Cpanel::Exception(?:::$ns_regexp)?::create$/, qr/\s$/, { 'optional' => 1, 'arg_position' => 2 } ], [ qr/Cpanel::Exception(?:::$ns_regexp)?\-\>create$/, qr/\s*$/, { 'optional' => 1 } ];
	1					91
40							}
41
42	58					70	my @results;
43							my %offset_seen;
44
45							# I like this alignment better than what tidy does, seems clearer to me even if a bit overkill perhaps
46							#tidyoff
47	58	100				170	for my $regexp (
	10	100				28
48							$conf_hr->{'regexp_conf'} ? (
49	3					9	$conf_hr->{'no_default_regex'} ? @{ $conf_hr->{'regexp_conf'} }
50							: ( $default_regexp_conf_item, @{ $conf_hr->{'regexp_conf'} } )
51							)
52							: ($default_regexp_conf_item)
53							) {
54							#tidyon
55	65					106	my $text_working_copy = $text;
56	65					94	my $original_len = length($text_working_copy);
57
58	65	100	66			318	my $rx_conf_hr = defined $regexp->[2] && ref( $regexp->[2] ) eq 'HASH' ? $regexp->[2] : { 'optional' => 0, 'arg_position' => 0 };
59	65	100				214	$rx_conf_hr->{arg_position} = exists $rx_conf_hr->{arg_position} ? int( abs( $rx_conf_hr->{arg_position} ) ) : 0; # if caller passes a non-numeric value this should warn, that is a feature!
60
61	65					1398	my $token_rx = qr/($regexp->[0]\|## no extract maketext)/;
62	65		66			1024	while ( defined $text_working_copy && $text_working_copy =~ m/$token_rx/ ) {
63	513					919	my $matched = $1;
64
65							# we have a (possibly multiline) chunk w/ notation-not-preceeded-by-token that we should ignore
66	513	100				854	if ( $matched eq '## no extract maketext' ) {
67	4					28	$text_working_copy =~ s/.## no extract maketext[^\n]//;
68	4					47	next;
69							}
70
71	509					450	my $pre;
72
73							# TODO: incorporate the \s* into results: 'post_token_ws' => $1 \|\| '' ?
74	509					5808	( $pre, $text_working_copy ) = split( m/$regexp->[0]\s/, $text_working_copy, 2 ); # the \s takes into account trailing WS that Text::Balanced ignores which then can throw off the offset
75
76							# we have a token line that we should ignore
77	509	100				1827	next if $text_working_copy =~ s/^[^\n]## no extract maketext[^\n]//;
78
79	503					704	my $offset = $original_len - length($text_working_copy);
80
81	503					373	my $phrase;
82	503					1664	my $result_hr = { 'is_error' => 0, 'is_warning' => 0, 'offset' => $offset, 'regexp' => $regexp, 'matched' => $matched };
83
84	503	100				1001	if ( $conf_hr->{'ignore_perlish_comments'} ) {
85
86							# ignore matches in a comment
87	6	100	100			28	if ( $pre =~ m/\#/ && $pre !~ m/[\n\r]$/ ) {
88	2					9	my @lines = split( /[\n\r]+/, $pre );
89
90	2	100				7	if ( $lines[-1] =~ m/\#/ ) {
91	1					2	$result_hr->{'type'} = 'comment';
92	1	50				2	$result_hr->{'line'} = $linenum if defined $linenum;
93	1					2	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	1					8
94	1					23	next;
95							}
96							}
97							}
98
99							# ignore functions named *$1
100	502	100	100			1758	if ( $text_working_copy =~ m/^\s\{/ && $matched !~ m/\(\s$/ ) {
101	4					12	$result_hr->{'type'} = 'function';
102	4	50				10	$result_hr->{'line'} = $linenum if defined $linenum;
103	4					4	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	4					10
104	4					54	next;
105							}
106
107							# ignore assignments to things named *maketext
108	498	100				1158	if ( $text_working_copy =~ m/^\s*=/ ) {
109	2					4	$result_hr->{'type'} = 'assignment';
110	2	50				6	$result_hr->{'line'} = $linenum if defined $linenum;
111	2					2	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	2					5
112	2					45	next;
113							}
114
115	496	100				894	if ( $conf_hr->{'ignore_perlish_statement'} ) {
116
117							# ignore a statement named *maketext (e.g. goto &XYZ::maketext;)
118	3	100				8	if ( $text_working_copy =~ m/^\s*;/ ) {
119	1					1	$result_hr->{'type'} = 'statement';
120	1	50				3	$result_hr->{'line'} = $linenum if defined $linenum;
121	1					1	push @{ $conf_hr->{'debug_ignored_matches'} }, $result_hr;
	1					2
122	1					13	next;
123							}
124							}
125
126							# phrase is argument N (instead of first)
127	495	100				941	if ( $rx_conf_hr->{'arg_position'} > 0 ) {
128
129							# hack away the args before the one at $arg_position
130	25					43	for my $at_index ( 1 .. $rx_conf_hr->{'arg_position'} ) {
131	55					1981	$text_working_copy =~ s{^\s\,\s}{};
132	55	100				113	if ( $at_index >= $rx_conf_hr->{'arg_position'} ) {
133	25					57	$result_hr->{'offset'} = $original_len - length($text_working_copy);
134	25					36	last;
135							}
136
137	30					66	( $phrase, $text_working_copy ) = Text::Balanced::extract_variable($text_working_copy);
138
139	30	50				830	if ( !defined $phrase ) {
140	30					50	( $phrase, $text_working_copy ) = Text::Balanced::extract_quotelike($text_working_copy);
141							}
142							}
143							}
144
145	495					1123	( $phrase, $text_working_copy ) = Text::Balanced::extract_variable($text_working_copy);
146	495	100				21249	my $optional_perlish =
		100
147							$text_working_copy =~ m/^\s*\[/ ? "ARRAY"
148							: $text_working_copy =~ m/^\s*\{/ ? "HASH"
149							: 0;
150
151	495	100				750	if ( !$phrase ) {
152
153							# undef $@;
154	465					392	my ( $type, $inside, $opener, $closer );
155	465					891	( $phrase, $text_working_copy, undef, $type, $opener, $inside, $closer ) = Text::Balanced::extract_quotelike($text_working_copy);
156	465	50				39842	$text_working_copy = '' if !defined $text_working_copy;
157
158	465	100	100			3010	$result_hr->{'quotetype'} = 'single' if ( defined $opener && $opener eq "'" ) \|\| ( defined $type && ( $type eq 'q' \|\| $type eq 'qw' ) );
			100
			66
			66
159	465	100	100			2473	$result_hr->{'quotetype'} = 'double' if ( defined $opener && $opener eq '"' ) \|\| ( defined $type && $type eq 'qq' );
			100
			66
160	465	100				837	if ( $result_hr->{'quotetype'} ) {
161	286					485	$result_hr->{'quote_before'} = $type . $opener;
162	286					604	$result_hr->{'quote_after'} = $closer;
163							}
164
165	465	100	100			1386	if ( defined $type && $type eq '<<' ) {
166	22					53	$result_hr->{'quote_before'} = $type . $opener;
167	22					31	$result_hr->{'quote_after'} = $closer;
168
169	22					41	$result_hr->{'heredoc'} = $opener;
170	22	100				46	if ( substr( $opener, 0, 1 ) eq "'" ) {
171	19					28	$result_hr->{'quotetype'} = 'single';
172							}
173							else {
174	3					4	$result_hr->{'quotetype'} = 'double';
175							}
176							}
177
178	465	100	100			2881	if ( defined $inside && ( exists $result_hr->{'quotetype'} ) && $inside eq '' ) {
		100	100
		50	66
			33
			33
			0
			0
179	30					40	$result_hr->{'is_error'} = 1;
180	30					36	$result_hr->{'type'} = 'empty';
181	30					43	$phrase = $inside;
182							}
183							elsif ( defined $inside && $inside ) {
184	320					402	$phrase = $inside;
185
186	320	100	100			3138	if ( $type eq 'qw' ) {
		100	100
		100	100
			100
			100
187	21	100				72	if ( $phrase =~ m/\A(\s+)/ ) {
188	2					4	$result_hr->{'quote_before'} .= $1;
189	2					5	$phrase =~ s/\A(\s+)//;
190							}
191	21	100				82	if ( $phrase =~ m/(\s+)\z/ ) {
192	2					5	$result_hr->{'quote_after'} = $1 . $result_hr->{'quote_after'};
193	2					5	$phrase =~ s/(\s+)\z//;
194							}
195
196	21	100				69	if ( $phrase =~ m/(\s+)/ ) {
197	19					45	$result_hr->{'quote_after'} = $1;
198							}
199
200							# otherwise leave quote_after asis for cases like this: qw(foo)
201
202	21					63	($phrase) = split( /\s+/, $phrase, 2 );
203							}
204							elsif ( $type eq 'qx' \|\| $opener eq '`' ) {
205	12					16	$result_hr->{'is_warning'} = 1;
206	12					28	$result_hr->{'type'} = 'command';
207							}
208							elsif ( $type eq 'm' \|\| $type eq 'qr' \|\| $type eq 's' \|\| $type eq 'tr' \|\| $opener eq '/' ) {
209	30					38	$result_hr->{'is_warning'} = 1;
210	30					50	$result_hr->{'type'} = 'pattern';
211							}
212							}
213							elsif ( defined $opener && defined $inside && defined $closer && defined $phrase && $phrase eq "$opener$inside$closer" ) {
214	0					0	$result_hr->{'is_error'} = 1;
215	0					0	$result_hr->{'type'} = 'empty';
216	0					0	$phrase = $inside;
217							}
218							else {
219	115					106	my $is_no_arg = 0;
220	115	50				226	if ( defined $regexp->[1] ) {
221	115	100				263	if ( ref( $regexp->[1] ) eq 'CODE' ) {
		50
222	89					164	my $rgx = $regexp->[1]->($matched);
223	89	100				1186	if ( $text_working_copy =~ m/^$rgx/ ) {
224	37					85	$is_no_arg = 1;
225							}
226							}
227							elsif ( ref( $regexp->[1] ) eq 'Regexp' ) {
228	26					122	my $rgx = qr/^$regexp->[1]/;
229	26	100				129	if ( $text_working_copy =~ $rgx ) {
230	15					29	$is_no_arg = 1;
231							}
232							}
233							}
234
235	115	100				758	if ($is_no_arg) {
		100
236	52	100				105	if ( $rx_conf_hr->{'optional'} ) {
237	13					124	next;
238							}
239							else {
240	39					49	$result_hr->{'is_error'} = 1;
241	39					72	$result_hr->{'type'} = 'no_arg';
242							}
243							}
244							elsif ( $text_working_copy =~ m/^\s(((?:\&\|\\\)?)$ns_regexp(?:\-\>$ns_regexp)?((?:\s*\()?))/o ) {
245	44					86	$phrase = $1;
246	44	100	66			186	my $perlish = $2 \|\| $3 ? 1 : 0;
247
248	44					486	$text_working_copy =~ s/\s(?:\&\|\\\)?$ns_regexp(?:\-\>$ns_regexp)?(?:\s\()?\s//o;
249
250	44					82	$result_hr->{'is_warning'} = 1;
251	44	100				126	$result_hr->{'type'} = $perlish ? 'perlish' : 'bareword';
252							}
253							}
254							}
255							else {
256	30					49	$result_hr->{'is_warning'} = 1;
257	30					43	$result_hr->{'type'} = 'perlish';
258							}
259
260	482	100				723	if ( !defined $phrase ) {
261	58					58	my $is_no_arg = 0;
262	58	50				118	if ( defined $regexp->[1] ) {
263	58	100				124	if ( ref( $regexp->[1] ) eq 'CODE' ) {
		50
264	49					81	my $rgx = $regexp->[1]->($matched);
265	49	100				626	if ( $text_working_copy =~ m/^$rgx/ ) {
266	37					71	$is_no_arg = 1;
267							}
268							}
269							elsif ( ref( $regexp->[1] ) eq 'Regexp' ) {
270	9					38	my $rgx = qr/^$regexp->[1]/;
271	9	100				29	if ( $text_working_copy =~ $rgx ) {
272	2					3	$is_no_arg = 1;
273							}
274							}
275							}
276
277	58	100				99	if ($is_no_arg) {
278	39	50				77	if ( $rx_conf_hr->{'optional'} ) {
279	0					0	next;
280							}
281							else {
282	39					56	$result_hr->{'is_error'} = 1;
283	39					58	$result_hr->{'type'} = 'no_arg';
284							}
285							}
286							else {
287	19	100				47	if ($optional_perlish) {
288	7	100				14	if ( $rx_conf_hr->{'optional'} ) {
289	3					30	next;
290							}
291							else {
292	4					6	$result_hr->{'is_warning'} = 1;
293	4					5	$result_hr->{'type'} = 'perlish';
294	4					4	$phrase = $optional_perlish;
295							}
296							}
297							else {
298	12					19	$result_hr->{'is_warning'} = 1;
299	12					25	$result_hr->{'type'} = 'multiline';
300							}
301							}
302							}
303							else {
304	424					602	$result_hr->{'original_text'} = $phrase;
305
306							# make sure its wasn't a tricky variable in quotes like maketext("$foo->{zip}")
307							# '$foo->{zip}' ' $foo->{zip} ' " $foo->{zip} " to but that seems like a good idea to flag as wonky and in need of human follow up
308	424					901	my ( $var, $for, $aft ) = Text::Balanced::extract_variable($phrase);
309	424	100	66			17856	if ( $var && defined $for && defined $aft && $for =~ m/\A\s\z/ && $aft =~ m/\A\s\z/ ) {
			66
			33
			33
310	34					60	$result_hr->{'is_warning'} = 1;
311	34					56	$result_hr->{'type'} = 'perlish';
312							}
313							else {
314	390	100				789	if ( exists $result_hr->{'quotetype'} ) {
315	304	100				647	if ( $result_hr->{'quotetype'} eq 'single' ) {
		50
316
317							# escape \n\t etc to preserver them during unquotemeta()
318	206					359	$phrase =~ s{(\\(?:n\|t\|f\|r\|a\|b))}{\\$1}g;
319							}
320							elsif ( $result_hr->{'quotetype'} eq 'double' ) {
321
322							# interpolate \n\t etc
323	98					202	$phrase =~ s{(\\(?:n\|t\|f\|r\|a\|b))}{eval qq{"$1"}}eg;
	6					453
324							}
325							}
326
327	390	50				586	if ( $conf_hr->{'encode_unicode_slash_x'} ) {
328
329							# Turn Unicode string \x{} into bytes strings
330	0					0	$phrase =~ s{(\\x\{[0-9a-fA-F]+\})}{Encode::encode_utf8( eval qq{"$1"} )}eg;
	0					0
331							}
332							else {
333
334							# Preserve Unicode string \x{} for unquotemeta()
335	390					529	$phrase =~ s{(\\)(x\{[0-9a-fA-F]+\})}{$1$1$2}g;
336							}
337
338							# Turn graphemes into characters to avoid quotemeta() problems
339	390					412	$phrase =~ s{((:?\\x[0-9a-fA-F]{2})+)}{eval qq{"$1"}}eg;
	2					152
340	390	100	100			1626	$phrase = String::Unquotemeta::unquotemeta($phrase) unless exists $result_hr->{'type'} && $result_hr->{'type'} eq 'perlish';
341							}
342							}
343
344	479					2961	$result_hr->{'phrase'} = $phrase;
345
346	479	50				10507	push @results, $result_hr if ++$offset_seen{ $result_hr->{'offset'} } == 1;
347							}
348							}
349
350	58					270	return [ sort { $a->{'offset'} <=> $b->{'offset'} } @results ];
	478					797
351							}
352
353							sub get_phrases_in_file {
354	1			1	1	2199	my ( $file, $regex_conf ) = @_;
355
356	1	50				31	open my $fh, '<', $file or return;
357
358	1					1	my @results;
359	1					1	my $prepend = '';
360	1					2	my $linenum = 0;
361	1					2	my $in_multi_line = 0;
362	1					1	my $line; # buffer
363
364	1					10	while ( $line = readline($fh) ) {
365	12					7	$linenum++;
366
367	12					11	my $initial_result_count = @results;
368	12	100				13	push @results, map { $_->{'file'} = $file; $_->{'line'} = $in_multi_line ? $in_multi_line : $linenum; $_ } @{ get_phrases_in_text( $prepend . $line, $regex_conf, $linenum ) };
	9					18
	9					15
	9					13
	12					24
369	12					16	my $updated_result_count = @results;
370
371	12	50	66			113	if ( $in_multi_line && $updated_result_count == $initial_result_count ) {
		100	66
		100	100
			66
			100
			66
372	0					0	$prepend = $prepend . $line;
373	0					0	next;
374							}
375							elsif ( $in_multi_line && $updated_result_count > $initial_result_count && $results[-1]->{'type'} ) {
376	4					6	$prepend = $prepend . $line;
377	4					3	pop @results;
378	4					16	next;
379							}
380							elsif ( !$in_multi_line && @results && defined $results[-1]->{'type'} && $results[-1]->{'type'} eq 'multiline' ) {
381	2					2	$in_multi_line = $linenum;
382	2					3	my $trailing_partial = pop @results;
383
384	2					10	require bytes;
385	2	50				8	my $offset = $trailing_partial->{'offset'} > bytes::length( $prepend . $line ) ? bytes::length( $prepend . $line ) : $trailing_partial->{'offset'};
386	2					9	$prepend = $trailing_partial->{'matched'} . substr( "$prepend$line", $offset );
387	2					9	next;
388							}
389							else {
390	6					8	$in_multi_line = 0;
391	6					22	$prepend = '';
392							}
393							}
394
395	1					7	close $fh;
396
397	1					5	return \@results;
398							}
399
400							1;
401
402							__END__