File Coverage

blib/lib/String/Util.pm

Criterion	Covered	Total	%
statement	139	229	60.7
branch	48	96	50.0
condition	12	29	41.3
subroutine	26	36	72.2
pod	22	30	73.3
total	247	420	58.8

line	stmt	bran	cond	sub	pod	time	code
1							package String::Util;
2
3	1			1		558	use strict;
	1					8
	1					29
4	1			1		5	use warnings;
	1					4
	1					24
5	1			1		5	use Carp;
	1					2
	1					91
6	1			1		25	use 5.014;
	1					3
7
8							# version
9							our $VERSION = '1.32';
10
11
12							#------------------------------------------------------------------------------
13							# opening POD
14							#
15
16							=head1 NAME
17
18							String::Util -- String processing utility functions
19
20							=head1 DESCRIPTION
21
22							String::Util provides a collection of small, handy functions for processing
23							strings in various ways.
24
25							=head1 INSTALLATION
26
27							cpanm String::Util
28
29							=head1 USAGE
30
31							No functions are exported by default, they must be specified:
32
33							use String::Util qw(trim eqq contains)
34
35							alternately you can use C<:all> to export B of the functions
36
37							use String::Util qw(:all)
38
39							=head1 FUNCTIONS
40
41							=cut
42
43							#
44							# opening POD
45							#------------------------------------------------------------------------------
46
47
48
49							#------------------------------------------------------------------------------
50							# export
51							#
52	1			1		9	use base 'Exporter';
	1					2
	1					91
53	1			1		8	use vars qw[@EXPORT_OK %EXPORT_TAGS];
	1					2
	1					3306
54
55							# the following functions accept a value and return a modified version of
56							# that value
57							push @EXPORT_OK, qw[
58							collapse crunch htmlesc trim ltrim
59							rtrim define repeat unquote no_space
60							nospace fullchomp randcrypt jsquote cellfill
61							crunchlines file_get_contents
62							];
63
64							# the following functions return true or false based on their input
65							push @EXPORT_OK, qw[
66							hascontent nocontent eqq equndef neqq neundef
67							startswith endswith contains sanitize
68							];
69
70							# the following function returns a random string of some type
71							push @EXPORT_OK, qw[ randword ];
72
73							# the following function returns the unicode values of a string
74							push @EXPORT_OK, qw[ ords deords ];
75
76							%EXPORT_TAGS = ('all' => [@EXPORT_OK]);
77							#
78							# export
79							#------------------------------------------------------------------------------
80
81
82							#------------------------------------------------------------------------------
83							# collapse
84							#
85
86							=head2 collapse($string)
87
88							C collapses all whitespace in the string down to single spaces.
89							Also removes all leading and trailing whitespace. Undefined input results in
90							undefined output.
91
92							B C is an alias to this function. It is considered deprecated.
93							It may be removed in future versions.
94
95							$var = collapse(" Hello world! "); # "Hello world!"
96
97							=cut
98
99							sub collapse {
100	2			2	1	82	my ($val) = @_;
101
102	2	100				8	if (defined $val) {
103	1					5	$val =~ s\|^\s+\|\|s;
104	1					6	$val =~ s\|\s+$\|\|s;
105	1					4	$val =~ s\|\s+\| \|sg;
106							}
107
108	2					12	return $val;
109							}
110
111							sub crunch {
112	0			0	0	0	return collapse(@_);
113							}
114							#
115							# collapse
116							#------------------------------------------------------------------------------
117
118
119							#------------------------------------------------------------------------------
120							# hascontent
121							#
122
123							=head2 hascontent($scalar), nocontent($scalar)
124
125							hascontent() returns true if the given argument is defined and contains
126							something besides whitespace.
127
128							An undefined value returns false. An empty string returns false. A value
129							containing nothing but whitespace (spaces, tabs, carriage returns, newlines,
130							backspace) returns false. A string containing any other characters (including
131							zero) returns true.
132
133							C returns the negation of C.
134
135							$var = hascontent(""); # False
136							$var = hascontent(" "); # False
137							$var = hascontent("a"); # True
138
139							$var = nocontent(""); # True
140							$var = nocontent("a"); # False
141
142							=cut
143
144							sub hascontent {
145	11			11	1	20	my $val = shift();
146
147	11	100				28	if (!defined($val)) {
148	2					6	return 0;
149							}
150
151							# If there are ANY non-space characters in it
152	9	100				32	if ($val =~ m\|\S\|s) {
153	5					14	return 1;
154							}
155
156	4					11	return 0;
157							}
158
159							sub nocontent {
160	6			6	1	12	my $str = shift();
161
162							# nocontent() is just the inverse to hascontent()
163	6					13	my $ret = !(hascontent($str));
164
165	6					24	return $ret;
166							}
167
168							#
169							# hascontent
170							#------------------------------------------------------------------------------
171
172
173							#------------------------------------------------------------------------------
174							# trim
175							#
176
177							=head2 trim($string), ltrim($string), rtrim($string)
178
179							Returns the string with all leading and trailing whitespace removed.
180							Trim on undef returns "".
181
182							$var = trim(" my string "); # "my string"
183
184							ltrim() trims B whitespace only.
185
186							rtrim() trims B whitespace only.
187
188							=cut
189
190							sub trim {
191	5			5	1	13	my $s = shift();
192
193	5	100				14	if (!defined($s)) {
194	1					4	return undef;
195							}
196
197	4					21	$s =~ s/^\s*//u;
198	4					18	$s =~ s/\s*$//u;
199
200	4					17	return $s;
201							}
202							#
203							# trim
204							#------------------------------------------------------------------------------
205
206
207							#------------------------------------------------------------------------------
208							# ltrim, rtrim
209							#
210
211							sub ltrim {
212	2			2	1	4	my $s = shift();
213
214	2	100				9	if (!defined($s)) {
215	1					4	return undef;
216							}
217
218	1					6	$s =~ s/^\s*//u;
219
220	1					5	return $s;
221							}
222
223							sub rtrim {
224	2			2	1	5	my $s = shift();
225
226	2	100				7	if (!defined($s)) {
227	1					4	return undef;
228							}
229
230	1					9	$s =~ s/\s*$//u;
231
232	1					4	return $s;
233							}
234
235							#
236							# ltrim, rtrim
237							#------------------------------------------------------------------------------
238
239
240
241							#------------------------------------------------------------------------------
242							# no_space
243							#
244
245							=head2 nospace($string)
246
247							Removes B whitespace characters from the given string. This includes spaces
248							between words.
249
250							$var = nospace(" Hello World! "); # "HelloWorld!"
251
252							=cut
253
254							sub no_space {
255	0			0	0	0	return nospace(@_);
256							}
257
258							# alias nospace to no_space
259							sub nospace {
260	3			3	1	6	my $val = shift();
261
262	3	100				10	if (defined $val) {
263	2					9	$val =~ s\|\s+\|\|gs;
264							}
265
266	3					15	return $val;
267							}
268
269							#
270							# no_space
271							#------------------------------------------------------------------------------
272
273
274
275							#------------------------------------------------------------------------------
276							# htmlesc
277							#
278
279							=head2 htmlesc($string)
280
281							Formats a string for literal output in HTML. An undefined value is returned as
282							an empty string.
283
284							htmlesc() is very similar to CGI.pm's escapeHTML. However, there are a few
285							differences. htmlesc() changes an undefined value to an empty string, whereas
286							escapeHTML() returns undefs as undefs.
287
288							=cut
289
290							sub htmlesc {
291	2			2	1	5	my ($val) = @_;
292
293	2	100				7	if (defined $val) {
294	1					5	$val =~ s\|\&\|&\|g;
295	1					4	$val =~ s\|\"\|"\|g;
296	1					3	$val =~ s\|\<\|<\|g;
297	1					3	$val =~ s\|\>\|>\|g;
298							} else {
299	1					2	$val = '';
300							}
301
302	2					7	return $val;
303							}
304							#
305							# htmlesc
306							#------------------------------------------------------------------------------
307
308
309							#------------------------------------------------------------------------------
310							# cellfill
311							#
312
313							sub cellfill{
314	0			0	0	0	my ($val) = @_;
315
316	0					0	carp("cellfill() is deprecated and will be removed in future versions");
317
318	0	0				0	if (hascontent($val)) {
319	0					0	$val = htmlesc($val);
320							} else {
321	0					0	$val = ' ';
322							}
323
324	0					0	return $val;
325							}
326							#
327							# cellfill
328							#------------------------------------------------------------------------------
329
330
331							#------------------------------------------------------------------------------
332							# jsquote
333							#
334
335							=head2 jsquote($string)
336
337							Escapes and quotes a string for use in JavaScript. Escapes single quotes and
338							surrounds the string in single quotes. Returns the modified string.
339
340							=cut
341
342							sub jsquote {
343	1			1	1	2	my ($str) = @_;
344
345	1	50				5	if (!defined($str)) {
346	0					0	return undef;
347							}
348
349							# Escape single quotes.
350	1					5	$str =~ s\|'\|\\'\|gs;
351
352							# Break up anything that looks like a closing HTML tag. This modification
353							# is necessary in an HTML web page. It is unnecessary but harmless if the
354							# output is used in a JavaScript document.
355	1					5	$str =~ s\|
356
357							# break up newlines
358	1					3	$str =~ s\|\n\|\\n\|gs;
359
360							# surround in quotes
361	1					3	$str = qq\|'$str'\|;
362
363							# return
364	1					4	return $str;
365							}
366							#
367							# jsquote
368							#------------------------------------------------------------------------------
369
370
371							#------------------------------------------------------------------------------
372							# unquote
373							#
374
375							=head2 unquote($string)
376
377							If the given string starts and ends with quotes, removes them. Recognizes
378							single quotes and double quotes. The value must begin and end with same type
379							of quotes or nothing is done to the value. Undef input results in undef output.
380							Some examples and what they return:
381
382							unquote(q\|'Hendrix'\|); # Hendrix
383							unquote(q\|"Hendrix"\|); # Hendrix
384							unquote(q\|Hendrix\|); # Hendrix
385							unquote(q\|"Hendrix'\|); # "Hendrix'
386							unquote(q\|O'Sullivan\|); # O'Sullivan
387
388							B braces
389
390							If the braces option is true, surrounding braces such as [] and {} are also
391							removed. Some examples:
392
393							unquote(q\|[Janis]\|, braces=>1); # Janis
394							unquote(q\|{Janis}\|, braces=>1); # Janis
395							unquote(q\|(Janis)\|, braces=>1); # Janis
396
397							=cut
398
399							sub unquote {
400	4			4	1	11	my ($val, %opts) = @_;
401
402	4	50				12	if (defined $val) {
403	4	100	66			36	my $found = $val =~ s\|^\`(.*)\`$\|$1\|s or
404							$val =~ s\|^\"(.*)\"$\|$1\|s or
405							$val =~ s\|^\'(.*)\'$\|$1\|s;
406
407	4	50	33			17	if ($opts{'braces'} && ! $found) {
408	0	0	0			0	$val =~ s\|^\[(.*)\]$\|$1\|s or
409							$val =~ s\|^$(.*)$$\|$1\|s or
410							$val =~ s\|^\{(.*)\}$\|$1\|s;
411							}
412							}
413
414	4					38	return $val;
415							}
416							#
417							# unquote
418							#------------------------------------------------------------------------------
419
420
421							#------------------------------------------------------------------------------
422							# define
423							#
424
425							sub define {
426	0			0	0	0	carp("define() is deprecated and may be removed in future version");
427
428	0					0	my ($val) = @_;
429
430							# if overloaded object, get return value and
431							# concatenate with string (which defines it).
432	0	0	0			0	if (ref($val) && overload::Overloaded($val)) {
433	0					0	local $^W = 0;
434	0					0	$val = $val . '';
435							}
436
437	0	0				0	defined($val) or $val = '';
438	0					0	return $val;
439							}
440							#
441							# define
442							#------------------------------------------------------------------------------
443
444
445							#------------------------------------------------------------------------------
446							# repeat
447							#
448
449							=head2 repeat($string, $count)
450
451							Returns the given string repeated the given number of times. The following
452							command outputs "Fred" three times:
453
454							print repeat('Fred', 3), "\n";
455
456							Note that repeat() was created a long time based on a misunderstanding of how
457							the perl operator 'x' works. The following command using 'x' would perform
458							exactly the same as the above command.
459
460							print 'Fred' x 3, "\n";
461
462							Use whichever you prefer.
463
464							=cut
465
466							sub repeat {
467	0			0	1	0	my ($val, $count) = @_;
468	0					0	return ($val x int($count));
469							}
470							#
471							# repeat
472							#------------------------------------------------------------------------------
473
474
475							#------------------------------------------------------------------------------
476							# randword
477							#
478
479							=head2 randword($length, %options)
480
481							Returns a random string of characters. String will not contain any vowels (to
482							avoid distracting dirty words). First argument is the length of the return
483							string. So this code:
484
485							foreach my $idx (1..3) {
486							print randword(4), "\n";
487							}
488
489							would output something like this:
490
491							kBGV
492							NCWB
493							3tHJ
494
495							If the string 'dictionary' is sent instead of an integer, then a word is
496							randomly selected from a dictionary file. By default, the dictionary file
497							is assumed to be at /usr/share/dict/words and the shuf command is used to
498							pull out a word. The hash %String::Util::PATHS sets the paths to the
499							dictionary file and the shuf executable. Modify that hash to change the paths.
500							So this code:
501
502							foreach my $idx (1..3) {
503							print randword('dictionary'), "\n";
504							}
505
506							would output something like this:
507
508							mustache
509							fronds
510							browning
511
512							B alpha
513
514							If the alpha option is true, only alphabetic characters are returned, no
515							numerals. For example, this code:
516
517							foreach my $idx (1..3) {
518							print randword(4, alpha=>1), "\n";
519							}
520
521							would output something like this:
522
523							qrML
524							wmWf
525							QGvF
526
527							B numerals
528
529							If the numerals option is true, only numerals are returned, no alphabetic
530							characters. So this code:
531
532							foreach my $idx (1..3) {
533							print randword(4, numerals=>1), "\n";
534							}
535
536							would output something like this:
537
538							3981
539							4734
540							2657
541
542							B strip_vowels
543
544							This option is true by default. If true, vowels are not included in the
545							returned random string. So this code:
546
547							foreach my $idx (1..3) {
548							print randword(4, strip_vowels=>1), "\n";
549							}
550
551							would output something like this:
552
553							Sk3v
554							pV5z
555							XhSX
556
557							=cut
558
559							# path information for WC
560							our %PATHS = (
561							wc => '/usr/bin/wc',
562							shuf => '/usr/bin/shuf',
563							words => '/usr/share/dict/words',
564							head => '/usr/bin/head',
565							tail => '/usr/bin/tail',
566							);
567
568							sub randword {
569	1			1	1	4	my ($count, %opts) = @_;
570	1					3	my ($rv, $char, @chars, $paths);
571	1					2	$rv = '';
572
573							# check syntax
574	1	50				4	defined($count) or croak 'syntax: randword($count)';
575
576							# dictionary word
577	1	50				5	if ($count =~ m\|^dict\|si) {
578							# loop until acceptable word is found
579							DICTIONARY:
580	0					0	while (1) {
581	0					0	my ($cmd, $line_count, $line_num, $word);
582
583							# use Encode module
584	0					0	require Encode;
585
586							# get line count
587	0					0	$cmd = qq\|$PATHS{'wc'} -l "$PATHS{'words'}"\|;
588	0					0	($line_count) = `$cmd`;
589	0					0	$line_count =~ s\|\s.*\|\|s;
590
591							# get random line
592	0					0	$line_num = rand($line_count);
593
594							# untaint line number
595	0	0				0	unless ($line_num =~ m\|^([0-9]+)$\|s) { die "invalid line number: $line_num" }
	0					0
596	0					0	$line_num = $1;
597
598							# get random word
599	0					0	$cmd = qq[$PATHS{'head'} -$line_num "$PATHS{'words'}" \| $PATHS{'tail'} -1];
600	0					0	($word) = `$cmd`;
601	0					0	$word =~ s\|\s.*\|\|si;
602	0					0	$word =~ s\|'.*\|\|si;
603	0					0	$word = lc($word);
604
605							# only allow words that are all letters
606	0	0				0	if ($opts{'letters_only'}) {
607	0	0				0	unless ($word =~ m\|^[a-z]+$\|s)
608	0					0	{ next DICTIONARY }
609							}
610
611							# check for max length
612	0	0				0	if ($opts{'maxlength'}) {
613	0	0				0	if ( length($word) > $opts{'maxlength'} )
614	0					0	{ next DICTIONARY }
615							}
616
617							# encode unless specifically opted not to do so
618	0	0	0			0	unless ( defined($opts{'encode'}) && (! $opts{'encode'}) )
619	0					0	{ $word = Encode::encode_utf8($word) }
620
621							# return
622	0					0	return $word;
623							}
624							}
625
626							# alpha only
627	1	50				4	if ($opts{'alpha'})
628	0					0	{ @chars = ('a' .. 'z', 'A' .. 'Z') }
629
630							# else alpha and numeral
631							else
632	1					14	{ @chars = ('a' .. 'z', 'A' .. 'Z', '0' .. '9') }
633
634							# defaults
635	1	50				6	defined($opts{'strip_vowels'}) or $opts{'strip_vowels'} = 1;
636
637	1					4	while (length($rv) < $count) {
638	23					67	$char = rand();
639
640							# numerals only
641	23	50				42	if ($opts{'numerals'}) {
642	0					0	$char =~ s\|^0.\|\|;
643	0					0	$char =~ s\|\D\|\|g;
644							}
645
646							# character random word
647							else {
648	23					37	$char = int( $char * ($#chars + 1) );
649	23					37	$char = $chars[$char];
650	23	100	66			73	next if($opts{'strip_vowels'} && $char =~ m/[aeiouy]/i);
651							}
652
653	20					35	$rv .= $char;
654							}
655
656	1					9	return substr($rv, 0, $count);
657							}
658							#
659							# randword
660							#------------------------------------------------------------------------------
661
662
663
664							#------------------------------------------------------------------------------
665							# eqq
666							# formerly equndef
667							#
668
669							=head2 eqq($scalar1, $scalar2)
670
671							Returns true if the two given values are equal. Also returns true if both
672							are undef. If only one is undef, or if they are both defined but different,
673							returns false. Here are some examples and what they return.
674
675							$var = eqq('x', 'x'); # True
676							$var = eqq('x', undef); # False
677							$var = eqq(undef, undef); # True
678
679							B equndef() is an alias to this function. It is considered deprecated.
680							It may be removed in future versions.
681
682							=cut
683
684							# alias equndef to eqq
685	4			4	0	12	sub equndef { return eqq(@_) }
686
687							sub eqq {
688	8			8	1	17	my ($str1, $str2) = @_;
689
690							# if both defined
691	8	100	100			33	if ( defined($str1) && defined($str2) ) {
692	4					22	return $str1 eq $str2
693							}
694
695							# if neither are defined
696	4	50	66			15	if ( (!defined($str1)) && (!defined($str2)) ) {
697	2					9	return 1
698							}
699
700							# only one is defined, so return false
701	2					14	return 0;
702							}
703							#
704							# eqq
705							#------------------------------------------------------------------------------
706
707
708							#------------------------------------------------------------------------------
709							# neqq
710							# formerly neundef
711							#
712
713							=head2 neqq($scalar1, $scalar2)
714
715							The opposite of neqq, returns true if the two values are not the same.
716							Here are some examples and what they return.
717
718							$var = neqq('x', 'x'); # False
719							$var = neqq('x', undef); # True
720							$var = neqq(undef, undef); # False
721
722							B neundef() is an alias to this function. It is considered deprecated.
723							It may be removed in future versions.
724
725							=cut
726
727							sub neundef {
728	4	100		4	0	9	return eqq(@_) ? 0 : 1;
729							}
730
731							sub neqq {
732	0	0		0	1	0	return eqq(@_) ? 0 : 1;
733							}
734							#
735							# neqq
736							#------------------------------------------------------------------------------
737
738
739							#------------------------------------------------------------------------------
740							# fullchomp
741							#
742
743							sub fullchomp {
744	0			0	0	0	carp("fullchomp() is deprecated and may be removed in future versions");
745
746	0					0	my ($line) = @_;
747	0	0				0	defined($line) and $line =~ s\|[\r\n]+$\|\|s;
748	0	0				0	defined(wantarray) and return $line;
749	0					0	$_[0] = $line;
750							}
751							#
752							# fullchomp
753							#------------------------------------------------------------------------------
754
755
756							#------------------------------------------------------------------------------
757							# randcrypt
758							#
759
760							sub randcrypt {
761	0			0	0	0	carp("randcrypt() is deprecated and may be removed in future versions");
762
763	0					0	my $pw = shift();
764	0					0	my $ret;
765	0					0	$ret = crypt($pw, randword(2));
766
767	0					0	return $ret;
768							}
769							#
770							# randcrypt
771							#------------------------------------------------------------------------------
772
773
774							#------------------------------------------------------------------------------
775							# ords
776							#
777
778							=head2 ords($string)
779
780							Returns the given string represented as the ascii value of each character.
781
782							$var = ords('Hendrix'); # {72}{101}{110}{100}{114}{105}{120}
783
784							B
785
786							=over 4
787
788							=item * convert_spaces=>[true\|false]
789
790							If convert_spaces is true (which is the default) then spaces are converted to
791							their matching ord values. So, for example, this code:
792
793							$var = ords('a b', convert_spaces=>1); # {97}{32}{98}
794
795							This code returns the same thing:
796
797							$var = ords('a b'); # {97}{32}{98}
798
799							If convert_spaces is false, then spaces are just returned as spaces. So this
800							code:
801
802							ords('a b', convert_spaces=>0); # {97} {98}
803
804
805							=item * alpha_nums
806
807							If the alpha_nums option is false, then characters 0-9, a-z, and A-Z are not
808							converted. For example, this code:
809
810							$var = ords('a=b', alpha_nums=>0); # a{61}b
811
812							=back
813
814							=cut
815
816							sub ords {
817	0			0	1	0	my ($str, %opts) = @_;
818	0					0	my (@rv, $show_chars);
819
820							# default options
821	0					0	%opts = (convert_spaces=>1, alpha_nums=>1, %opts);
822
823							# get $show_chars option
824	0					0	$show_chars = $opts{'show_chars'};
825
826							# split into individual characters
827	0					0	@rv = split '', $str;
828
829							# change elements to their unicode numbers
830							CHAR_LOOP:
831	0					0	foreach my $char (@rv) {
832							# don't convert space if called so
833	0	0	0			0	if ( (! $opts{'convert_spaces'}) && ($char =~ m\|^\s$\|s) )
834	0					0	{ next CHAR_LOOP }
835
836							# don't convert alphanums
837	0	0				0	if (! $opts{'alpha_nums'}) {
838	0	0				0	if ( $char =~ m\|^[a-z0-9]$\|si) {
839	0					0	next CHAR_LOOP;
840							}
841							}
842
843	0					0	my $rv = '{';
844
845	0	0				0	if ($show_chars)
846	0					0	{ $rv .= $char . ':' }
847
848	0					0	$rv .= ord($char) . '}';
849
850	0					0	$char = $rv;
851							}
852
853							# return words separated by spaces
854	0					0	return join('', @rv);
855							}
856							#
857							# ords
858							#------------------------------------------------------------------------------
859
860
861							#------------------------------------------------------------------------------
862							# deords
863							#
864
865							=head2 deords($string)
866
867							Takes the output from ords() and returns the string that original created that
868							output.
869
870							$var = deords('{72}{101}{110}{100}{114}{105}{120}'); # 'Hendrix'
871
872							=cut
873
874							sub deords {
875	0			0	1	0	my ($str) = @_;
876	0					0	my (@tokens, $rv);
877	0					0	$rv = '';
878
879							# get tokens
880	0					0	@tokens = split(m\|[\{\}]\|s, $str);
881	0					0	@tokens = grep {length($_)} @tokens;
	0					0
882
883							# build return string
884	0					0	foreach my $token (@tokens) {
885	0					0	$rv .= chr($token);
886							}
887
888							# return
889	0					0	return $rv;
890							}
891							#
892							# deords
893							#------------------------------------------------------------------------------
894
895							=head2 contains($string, $substring)
896
897							Checks if the string contains substring
898
899							$var = contains("Hello world", "Hello"); # true
900							$var = contains("Hello world", "llo wor"); # true
901							$var = contains("Hello world", "QQQ"); # false
902
903							# Also works with grep
904							@arr = grep { contains("cat") } @input;
905
906							=cut
907
908							sub contains {
909	5			5	1	11	my ($str, $substr) = @_;
910
911	5	100				13	if (!defined($str)) {
912	1					5	return undef;
913							}
914
915	4	50				9	if (!$substr) {
916	0					0	$substr = $str;
917	0					0	$str = $_;
918							}
919
920	4					11	my $ret = index($str, $substr, 0) != -1;
921
922	4					27	return $ret;
923							}
924
925							=head2 startswith($string, $substring)
926
927							Checks if the string starts with the characters in substring
928
929							$var = startwith("Hello world", "Hello"); # true
930							$var = startwith("Hello world", "H"); # true
931							$var = startwith("Hello world", "Q"); # false
932
933							# Also works with grep
934							@arr = grep { startswith("X") } @input;
935
936							=cut
937
938							sub startswith {
939	5			5	1	15	my ($str, $substr) = @_;
940
941	5	100				11	if (!defined($str)) {
942	1					4	return undef;
943							}
944
945	4	50				8	if (!$substr) {
946	0					0	$substr = $str;
947	0					0	$str = $_;
948							}
949
950	4					9	my $ret = index($str, $substr, 0) == 0;
951
952	4					17	return $ret;
953							}
954
955							=head2 endswith($string, $substring)
956
957							Checks if the string ends with the characters in substring
958
959							$var = endswith("Hello world", "world"); # true
960							$var = endswith("Hello world", "d"); # true
961							$var = endswith("Hello world", "QQQ"); # false
962
963							# Also works with grep
964							@arr = grep { endswith("z") } @input;
965
966							=cut
967
968							sub endswith {
969	5			5	1	14	my ($str, $substr) = @_;
970
971	5	100				12	if (!defined($str)) {
972	1					4	return undef;
973							}
974
975	4	50				9	if (!$substr) {
976	0					0	$substr = $str;
977	0					0	$str = $_;
978							}
979
980	4					6	my $len = length($substr);
981	4					7	my $start = length($str) - $len;
982
983	4					10	my $ret = index($str, $substr, $start) != -1;
984
985	4					84	return $ret;
986							}
987
988							#------------------------------------------------------------------------------
989							# crunchlines
990							#
991
992							=head2 crunchlines($string)
993
994							Compacts contiguous newlines into single newlines. Whitespace between newlines
995							is ignored, so that two newlines separated by whitespace is compacted down to a
996							single newline.
997
998							$var = crunchlines("x\n\n\nx"); # "x\nx";
999
1000							=cut
1001
1002							sub crunchlines {
1003	3			3	1	10	my ($str) = @_;
1004
1005	3	100				9	if (!defined($str)) {
1006	1					5	return undef;
1007							}
1008
1009	2					17	while($str =~ s\|\n[ \t]*\n\|\n\|gs)
1010							{}
1011
1012	2					26	$str =~ s\|^\n\|\|s;
1013	2					6	$str =~ s\|\n$\|\|s;
1014
1015	2					10	return $str;
1016							}
1017							#
1018							# crunchlines
1019							#------------------------------------------------------------------------------
1020
1021							=head2 sanitize($string, $separator = "_")
1022
1023							Sanitize all non alpha-numeric characters in a string to underscores.
1024							This is useful to take a URL, or filename, or text description and know
1025							you can use it safely in a URL or a filename.
1026
1027							B This will remove any trailing or leading '_' on the string
1028
1029							$var = sanitize("http://www.google.com/") # http_www_google_com
1030							$var = sanitize("foo_bar()"; # foo_bar
1031							$var = sanitize("/path/to/file.txt"); # path_to_file_txt
1032							$var = sanitize("Big yellow bird!", "."); # Big.yellow.bird
1033
1034							=cut
1035
1036							sub sanitize {
1037	4			4	1	10	my $str = shift();
1038	4		100			15	my $sep = shift() // "_";
1039
1040	4	50				12	if (!defined($str)) {
1041	0					0	return undef;
1042							}
1043
1044							# Convert multiple non-word sequences to the separator
1045	4					29	$str =~ s/[\W_]+/$sep/g;
1046
1047							# The separator is a literal character so we quotemeta it
1048	4					9	$sep = quotemeta($sep);
1049							# Remove any separators at the beginning and end
1050	4					39	$str =~ s/\A$sep+//;
1051	4					33	$str =~ s/$sep+\z//;
1052
1053	4					21	return $str;
1054							}
1055
1056							###########################################################################
1057
1058							=head2 file_get_contents($string, $boolean)
1059
1060							Read an entire file from disk into a string. Returns undef if the file
1061							cannot be read for any reason. Can also return the file as an array of
1062							lines.
1063
1064							$str = file_get_contents("/tmp/file.txt"); # Return a string
1065							@lines = file_get_contents("/tmp/file.txt", 1); # Return an array
1066
1067							=cut
1068
1069							sub file_get_contents {
1070	2			2	1	300	my ($file, $ret_array) = @_;
1071
1072	2	50				85	open (my $fh, "<", $file) or return undef;
1073
1074	2					5	my $ret;
1075	2					66	while (<$fh>) {
1076	488					940	$ret .= $_;
1077							}
1078
1079	2	100				9	if ($ret_array) {
1080	1					192	return split(/\r?\n/,$ret);
1081							}
1082
1083	1					23	return $ret;
1084							}
1085
1086							# return true
1087							1;
1088
1089
1090							__END__