File Coverage

blib/lib/String/Util.pm

Criterion	Covered	Total	%
statement	130	168	77.3
branch	41	60	68.3
condition	10	20	50.0
subroutine	25	29	86.2
pod	21	22	95.4
total	227	299	75.9

line	stmt	bran	cond	sub	pod	time	code
1							package String::Util;
2
3	1			1		504	use strict;
	1					7
	1					26
4	1			1		5	use warnings;
	1					1
	1					23
5	1			1		4	use Carp;
	1					2
	1					82
6	1			1		20	use v5.14;
	1					3
7
8							# version
9							our $VERSION = '1.34';
10							our $FGC_MODE = 'UTF-8';
11
12							#------------------------------------------------------------------------------
13							# opening POD
14							#
15
16							=head1 NAME
17
18							B -- String processing utility functions
19
20							=head1 DESCRIPTION
21
22							B provides a collection of small, handy functions for processing
23							strings in various ways.
24
25							=head1 INSTALLATION
26
27							cpanm String::Util
28
29							=head1 USAGE
30
31							No functions are exported by default, they must be specified:
32
33							use String::Util qw(trim eqq contains)
34
35							alternately you can use C<:all> to export B of the functions
36
37							use String::Util qw(:all)
38
39							=head1 FUNCTIONS
40
41							=cut
42
43							#
44							# opening POD
45							#------------------------------------------------------------------------------
46
47
48
49							#------------------------------------------------------------------------------
50							# export
51							#
52	1			1		6	use base 'Exporter';
	1					2
	1					92
53	1			1		7	use vars qw[@EXPORT_OK %EXPORT_TAGS];
	1					1
	1					2105
54
55							# the following functions accept a value and return a modified version of
56							# that value
57							push @EXPORT_OK, qw[
58							collapse htmlesc trim ltrim
59							rtrim repeat unquote no_space
60							nospace jsquote crunchlines
61							file_get_contents
62							];
63
64							# the following functions return true or false based on their input
65							push @EXPORT_OK, qw[
66							hascontent nocontent eqq neqq
67							startswith endswith contains sanitize
68							];
69
70							# the following function returns the unicode values of a string
71							push @EXPORT_OK, qw[ ords deords ];
72
73							%EXPORT_TAGS = ('all' => [@EXPORT_OK]);
74							#
75							# export
76							#------------------------------------------------------------------------------
77
78
79							#------------------------------------------------------------------------------
80							# collapse
81							#
82
83							=head2 collapse($string)
84
85							C collapses all whitespace in the string down to single spaces.
86							Also removes all leading and trailing whitespace. Undefined input results in
87							undefined output.
88
89							$var = collapse(" Hello world! "); # "Hello world!"
90
91							=cut
92
93							sub collapse {
94	2			2	1	84	my ($val) = @_;
95
96	2	100				6	if (defined $val) {
97	1					4	$val =~ s\|^\s+\|\|s;
98	1					6	$val =~ s\|\s+$\|\|s;
99	1					5	$val =~ s\|\s+\| \|sg;
100							}
101
102	2					11	return $val;
103							}
104
105							#
106							# collapse
107							#------------------------------------------------------------------------------
108
109
110							#------------------------------------------------------------------------------
111							# hascontent
112							#
113
114							=head2 hascontent($scalar), nocontent($scalar)
115
116							C returns true if the given argument is defined and contains
117							something besides whitespace.
118
119							An undefined value returns false. An empty string returns false. A value
120							containing nothing but whitespace (spaces, tabs, carriage returns, newlines,
121							backspace) returns false. A string containing any other characters (including
122							zero) returns true.
123
124							C returns the negation of C.
125
126							$var = hascontent(""); # False
127							$var = hascontent(" "); # False
128							$var = hascontent("a"); # True
129
130							$var = nocontent(""); # True
131							$var = nocontent("a"); # False
132
133							=cut
134
135							sub hascontent {
136	11			11	1	18	my $val = shift();
137
138	11	100				26	if (!defined($val)) {
139	2					7	return 0;
140							}
141
142							# If there are ANY non-space characters in it
143	9	100				32	if ($val =~ m\|\S\|s) {
144	5					15	return 1;
145							}
146
147	4					12	return 0;
148							}
149
150							sub nocontent {
151	6			6	1	13	my $str = shift();
152
153							# nocontent() is just the inverse to hascontent()
154	6					12	my $ret = !(hascontent($str));
155
156	6					23	return $ret;
157							}
158
159							#
160							# hascontent
161							#------------------------------------------------------------------------------
162
163
164							#------------------------------------------------------------------------------
165							# trim
166							#
167
168							=head2 trim($string), ltrim($string), rtrim($string)
169
170							Returns the string with all leading and trailing whitespace removed.
171
172							$var = trim(" my string "); # "my string"
173
174							C trims B whitespace only.
175
176							C trims B whitespace only.
177
178							=cut
179
180							sub trim {
181	5			5	1	11	my $s = shift();
182
183	5	100				12	if (!defined($s)) {
184	1					3	return undef;
185							}
186
187	4					29	$s =~ s/^\s*//u;
188	4					19	$s =~ s/\s*$//u;
189
190	4					17	return $s;
191							}
192							#
193							# trim
194							#------------------------------------------------------------------------------
195
196
197							#------------------------------------------------------------------------------
198							# ltrim, rtrim
199							#
200
201							sub ltrim {
202	2			2	1	5	my $s = shift();
203
204	2	100				23	if (!defined($s)) {
205	1					6	return undef;
206							}
207
208	1					6	$s =~ s/^\s*//u;
209
210	1					4	return $s;
211							}
212
213							sub rtrim {
214	2			2	1	5	my $s = shift();
215
216	2	100				7	if (!defined($s)) {
217	1					4	return undef;
218							}
219
220	1					8	$s =~ s/\s*$//u;
221
222	1					4	return $s;
223							}
224
225							#
226							# ltrim, rtrim
227							#------------------------------------------------------------------------------
228
229
230
231							#------------------------------------------------------------------------------
232							# no_space
233							#
234
235							=head2 nospace($string)
236
237							Removes B whitespace characters from the given string. This includes spaces
238							between words.
239
240							$var = nospace(" Hello World! "); # "HelloWorld!"
241
242							=cut
243
244							sub no_space {
245	0			0	0	0	return nospace(@_);
246							}
247
248							# alias nospace to no_space
249							sub nospace {
250	3			3	1	6	my $val = shift();
251
252	3	100				10	if (defined $val) {
253	2					10	$val =~ s\|\s+\|\|gs;
254							}
255
256	3					19	return $val;
257							}
258
259							#
260							# no_space
261							#------------------------------------------------------------------------------
262
263
264
265							#------------------------------------------------------------------------------
266							# htmlesc
267							#
268
269							=head2 htmlesc($string)
270
271							Formats a string for literal output in HTML. An undefined value is returned as
272							an empty string.
273
274							htmlesc() is very similar to CGI.pm's escapeHTML. However, there are a few
275							differences. htmlesc() changes an undefined value to an empty string, whereas
276							escapeHTML() returns undefs as undefs.
277
278							=cut
279
280							sub htmlesc {
281	2			2	1	5	my ($val) = @_;
282
283	2	100				6	if (defined $val) {
284	1					5	$val =~ s\|\&\|&\|g;
285	1					3	$val =~ s\|\"\|"\|g;
286	1					3	$val =~ s\|\<\|<\|g;
287	1					3	$val =~ s\|\>\|>\|g;
288							} else {
289	1					2	$val = '';
290							}
291
292	2					8	return $val;
293							}
294							#
295							# htmlesc
296							#------------------------------------------------------------------------------
297
298
299							#------------------------------------------------------------------------------
300							# jsquote
301							#
302
303							=head2 jsquote($string)
304
305							Escapes and quotes a string for use in JavaScript. Escapes single quotes and
306							surrounds the string in single quotes. Returns the modified string.
307
308							=cut
309
310							sub jsquote {
311	1			1	1	6	my ($str) = @_;
312
313	1	50				5	if (!defined($str)) {
314	0					0	return undef;
315							}
316
317							# Escape single quotes.
318	1					7	$str =~ s\|'\|\\'\|gs;
319
320							# Break up anything that looks like a closing HTML tag. This modification
321							# is necessary in an HTML web page. It is unnecessary but harmless if the
322							# output is used in a JavaScript document.
323	1					7	$str =~ s\|
324
325							# break up newlines
326	1					4	$str =~ s\|\n\|\\n\|gs;
327
328							# surround in quotes
329	1					4	$str = qq\|'$str'\|;
330
331							# return
332	1					5	return $str;
333							}
334							#
335							# jsquote
336							#------------------------------------------------------------------------------
337
338
339							#------------------------------------------------------------------------------
340							# unquote
341							#
342
343							=head2 unquote($string)
344
345							If the given string starts and ends with quotes, removes them. Recognizes
346							single quotes and double quotes. The value must begin and end with same type
347							of quotes or nothing is done to the value. Undef input results in undef output.
348							Some examples and what they return:
349
350							unquote(q\|'Hendrix'\|); # Hendrix
351							unquote(q\|"Hendrix"\|); # Hendrix
352							unquote(q\|Hendrix\|); # Hendrix
353							unquote(q\|"Hendrix'\|); # "Hendrix'
354							unquote(q\|O'Sullivan\|); # O'Sullivan
355
356							B braces
357
358							If the braces option is true, surrounding braces such as [] and {} are also
359							removed. Some examples:
360
361							unquote(q\|[Janis]\|, braces=>1); # Janis
362							unquote(q\|{Janis}\|, braces=>1); # Janis
363							unquote(q\|(Janis)\|, braces=>1); # Janis
364
365							=cut
366
367							sub unquote {
368	4			4	1	12	my ($val, %opts) = @_;
369
370	4	50				10	if (defined $val) {
371	4	100	66			44	my $found = $val =~ s\|^\`(.*)\`$\|$1\|s or
372							$val =~ s\|^\"(.*)\"$\|$1\|s or
373							$val =~ s\|^\'(.*)\'$\|$1\|s;
374
375	4	50	33			16	if ($opts{'braces'} && ! $found) {
376	0	0	0			0	$val =~ s\|^\[(.*)\]$\|$1\|s or
377							$val =~ s\|^$(.*)$$\|$1\|s or
378							$val =~ s\|^\{(.*)\}$\|$1\|s;
379							}
380							}
381
382	4					18	return $val;
383							}
384							#
385							# unquote
386							#------------------------------------------------------------------------------
387
388
389							#------------------------------------------------------------------------------
390							# repeat
391							#
392
393							=head2 repeat($string, $count)
394
395							Returns the given string repeated the given number of times. The following
396							command outputs "Fred" three times:
397
398							print repeat('Fred', 3), "\n";
399
400							Note that C was created a long time based on a misunderstanding of how
401							the perl operator 'x' works. The following command using C would perform
402							exactly the same as the above command.
403
404							print 'Fred' x 3, "\n";
405
406							Use whichever you prefer.
407
408							=cut
409
410							sub repeat {
411	0			0	1	0	my ($val, $count) = @_;
412	0					0	return ($val x int($count));
413							}
414							#
415							# repeat
416							#------------------------------------------------------------------------------
417
418
419							#------------------------------------------------------------------------------
420							# eqq
421							# formerly equndef
422							#
423
424							=head2 eqq($scalar1, $scalar2)
425
426							Returns true if the two given values are equal. Also returns true if both
427							are C. If only one is C, or if they are both defined but different,
428							returns false. Here are some examples and what they return.
429
430							$var = eqq('x', 'x'); # True
431							$var = eqq('x', undef); # False
432							$var = eqq(undef, undef); # True
433
434							=cut
435
436							sub eqq {
437	8			8	1	17	my ($str1, $str2) = @_;
438
439							# if both defined
440	8	100	100			35	if ( defined($str1) && defined($str2) ) {
441	4					19	return $str1 eq $str2
442							}
443
444							# if neither are defined
445	4	50	66			15	if ( (!defined($str1)) && (!defined($str2)) ) {
446	2					13	return 1
447							}
448
449							# only one is defined, so return false
450	2					7	return 0;
451							}
452							#
453							# eqq
454							#------------------------------------------------------------------------------
455
456
457							#------------------------------------------------------------------------------
458							# neqq
459							# formerly neundef
460							#
461
462							=head2 neqq($scalar1, $scalar2)
463
464							The opposite of C, returns true if the two values are not the same.
465							Here are some examples and what they return.
466
467							$var = neqq('x', 'x'); # False
468							$var = neqq('x', undef); # True
469							$var = neqq(undef, undef); # False
470
471							=cut
472
473							sub neqq {
474	4	100		4	1	12	return eqq(@_) ? 0 : 1;
475							}
476							#
477							# neqq
478							#------------------------------------------------------------------------------
479
480
481							#------------------------------------------------------------------------------
482							# ords
483							#
484
485							=head2 ords($string)
486
487							Returns the given string represented as the ascii value of each character.
488
489							$var = ords('Hendrix'); # {72}{101}{110}{100}{114}{105}{120}
490
491							B
492
493							=over 4
494
495							=item * convert_spaces=>[true\|false]
496
497							If convert_spaces is true (which is the default) then spaces are converted to
498							their matching ord values. So, for example, this code:
499
500							$var = ords('a b', convert_spaces=>1); # {97}{32}{98}
501
502							This code returns the same thing:
503
504							$var = ords('a b'); # {97}{32}{98}
505
506							If convert_spaces is false, then spaces are just returned as spaces. So this
507							code:
508
509							ords('a b', convert_spaces=>0); # {97} {98}
510
511
512							=item * alpha_nums
513
514							If the alpha_nums option is false, then characters 0-9, a-z, and A-Z are not
515							converted. For example, this code:
516
517							$var = ords('a=b', alpha_nums=>0); # a{61}b
518
519							=back
520
521							=cut
522
523							sub ords {
524	0			0	1	0	my ($str, %opts) = @_;
525	0					0	my (@rv, $show_chars);
526
527							# default options
528	0					0	%opts = (convert_spaces=>1, alpha_nums=>1, %opts);
529
530							# get $show_chars option
531	0					0	$show_chars = $opts{'show_chars'};
532
533							# split into individual characters
534	0					0	@rv = split '', $str;
535
536							# change elements to their unicode numbers
537							CHAR_LOOP:
538	0					0	foreach my $char (@rv) {
539							# don't convert space if called so
540	0	0	0			0	if ( (! $opts{'convert_spaces'}) && ($char =~ m\|^\s$\|s) )
541	0					0	{ next CHAR_LOOP }
542
543							# don't convert alphanums
544	0	0				0	if (! $opts{'alpha_nums'}) {
545	0	0				0	if ( $char =~ m\|^[a-z0-9]$\|si) {
546	0					0	next CHAR_LOOP;
547							}
548							}
549
550	0					0	my $rv = '{';
551
552	0	0				0	if ($show_chars)
553	0					0	{ $rv .= $char . ':' }
554
555	0					0	$rv .= ord($char) . '}';
556
557	0					0	$char = $rv;
558							}
559
560							# return words separated by spaces
561	0					0	return join('', @rv);
562							}
563							#
564							# ords
565							#------------------------------------------------------------------------------
566
567
568							#------------------------------------------------------------------------------
569							# deords
570							#
571
572							=head2 deords($string)
573
574							Takes the output from C and returns the string that original created that
575							output.
576
577							$var = deords('{72}{101}{110}{100}{114}{105}{120}'); # 'Hendrix'
578
579							=cut
580
581							sub deords {
582	0			0	1	0	my ($str) = @_;
583	0					0	my (@tokens, $rv);
584	0					0	$rv = '';
585
586							# get tokens
587	0					0	@tokens = split(m\|[\{\}]\|s, $str);
588	0					0	@tokens = grep {length($_)} @tokens;
	0					0
589
590							# build return string
591	0					0	foreach my $token (@tokens) {
592	0					0	$rv .= chr($token);
593							}
594
595							# return
596	0					0	return $rv;
597							}
598							#
599							# deords
600							#------------------------------------------------------------------------------
601
602							=head2 contains($string, $substring)
603
604							Checks if the string contains substring
605
606							$var = contains("Hello world", "Hello"); # true
607							$var = contains("Hello world", "llo wor"); # true
608							$var = contains("Hello world", "QQQ"); # false
609
610							# Also works with grep
611							@arr = grep { contains("cat") } @input;
612
613							=cut
614
615							sub contains {
616	5			5	1	12	my ($str, $substr) = @_;
617
618	5	100				14	if (!defined($str)) {
619	1					5	return undef;
620							}
621
622	4	50				7	if (!$substr) {
623	0					0	$substr = $str;
624	0					0	$str = $_;
625							}
626
627	4					10	my $ret = index($str, $substr, 0) != -1;
628
629	4					16	return $ret;
630							}
631
632							=head2 startswith($string, $substring)
633
634							Checks if the string starts with the characters in substring
635
636							$var = startwith("Hello world", "Hello"); # true
637							$var = startwith("Hello world", "H"); # true
638							$var = startwith("Hello world", "Q"); # false
639
640							# Also works with grep
641							@arr = grep { startswith("X") } @input;
642
643							=cut
644
645							sub startswith {
646	5			5	1	17	my ($str, $substr) = @_;
647
648	5	100				14	if (!defined($str)) {
649	1					4	return undef;
650							}
651
652	4	50				9	if (!$substr) {
653	0					0	$substr = $str;
654	0					0	$str = $_;
655							}
656
657	4					10	my $ret = index($str, $substr, 0) == 0;
658
659	4					15	return $ret;
660							}
661
662							=head2 endswith($string, $substring)
663
664							Checks if the string ends with the characters in substring
665
666							$var = endswith("Hello world", "world"); # true
667							$var = endswith("Hello world", "d"); # true
668							$var = endswith("Hello world", "QQQ"); # false
669
670							# Also works with grep
671							@arr = grep { endswith("z") } @input;
672
673							=cut
674
675							sub endswith {
676	5			5	1	16	my ($str, $substr) = @_;
677
678	5	100				12	if (!defined($str)) {
679	1					5	return undef;
680							}
681
682	4	50				9	if (!$substr) {
683	0					0	$substr = $str;
684	0					0	$str = $_;
685							}
686
687	4					6	my $len = length($substr);
688	4					7	my $start = length($str) - $len;
689
690	4					11	my $ret = index($str, $substr, $start) != -1;
691
692	4					19	return $ret;
693							}
694
695							#------------------------------------------------------------------------------
696							# crunchlines
697							#
698
699							=head2 crunchlines($string)
700
701							Compacts contiguous newlines into single newlines. Whitespace between newlines
702							is ignored, so that two newlines separated by whitespace is compacted down to a
703							single newline.
704
705							$var = crunchlines("x\n\n\nx"); # "x\nx";
706
707							=cut
708
709							sub crunchlines {
710	3			3	1	9	my ($str) = @_;
711
712	3	100				9	if (!defined($str)) {
713	1					4	return undef;
714							}
715
716	2					16	while($str =~ s\|\n[ \t]*\n\|\n\|gs)
717							{}
718
719	2					5	$str =~ s\|^\n\|\|s;
720	2					24	$str =~ s\|\n$\|\|s;
721
722	2					10	return $str;
723							}
724							#
725							# crunchlines
726							#------------------------------------------------------------------------------
727
728							=head2 sanitize($string, $separator = "_")
729
730							Sanitize all non alpha-numeric characters in a string to underscores.
731							This is useful to take a URL, or filename, or text description and know
732							you can use it safely in a URL or a filename.
733
734							B This will remove any trailing or leading '_' on the string
735
736							$var = sanitize("http://www.google.com/") # http_www_google_com
737							$var = sanitize("foo_bar()"; # foo_bar
738							$var = sanitize("/path/to/file.txt"); # path_to_file_txt
739							$var = sanitize("Big yellow bird!", "."); # Big.yellow.bird
740
741							=cut
742
743							sub sanitize {
744	4			4	1	8	my $str = shift();
745	4		100			17	my $sep = shift() // "_";
746
747	4	50				10	if (!defined($str)) {
748	0					0	return undef;
749							}
750
751							# Convert multiple non-word sequences to the separator
752	4					28	$str =~ s/[\W_]+/$sep/g;
753
754							# The separator is a literal character so we quotemeta it
755	4					9	$sep = quotemeta($sep);
756							# Remove any separators at the beginning and end
757	4					37	$str =~ s/\A$sep+//;
758	4					31	$str =~ s/$sep+\z//;
759
760	4					19	return $str;
761							}
762
763							###########################################################################
764
765							=head2 file_get_contents($string, $boolean)
766
767							Read an entire file from disk into a string. Returns undef if the file
768							cannot be read for any reason. Can also return the file as an array of
769							lines.
770
771							$str = file_get_contents("/tmp/file.txt"); # Return a string
772							@lines = file_get_contents("/tmp/file.txt", 1); # Return an array
773
774							B If you opt to return an array, carriage returns and line feeds are
775							removed from the end of each line.
776
777							B File is read in B mode, unless C<$FGC_MODE> is set to an
778							appropriate encoding.
779
780							=cut
781
782							sub file_get_contents {
783	2			2	1	9	my ($file, $ret_array) = @_;
784
785	2	50				88	open (my $fh, "<", $file) or return undef;
786	1			1		7	binmode($fh, ":encoding($FGC_MODE)");
	1					3
	1					6
	2					51
787
788	2	100				11897	if ($ret_array) {
789	1					3	my @ret;
790
791	1					19	while (my $line = readline($fh)) {
792	236					1478	$line =~ s/[\r\n]*$//; # Remove CR/LF
793	236					740	push(@ret, $line);
794							}
795
796	1					104	return @ret;
797							} else {
798	1					3	my $ret = '';
799	1					34	while (my $line = readline($fh)) {
800	236					561	$ret .= $line;
801							}
802
803	1					49	return $ret;
804							}
805							}
806
807							# return true
808							1;
809
810
811							__END__