File Coverage

blib/lib/String/Util.pm

Criterion	Covered	Total	%
statement	123	218	56.4
branch	36	84	42.8
condition	17	42	40.4
subroutine	25	36	69.4
pod	22	30	73.3
total	223	410	54.3

line	stmt	bran	cond	sub	pod	time	code
1							package String::Util;
2
3	1			1		547	use strict;
	1					7
	1					28
4	1			1		5	use warnings;
	1					1
	1					24
5	1			1		5	use Carp;
	1					2
	1					91
6	1			1		28	use 5.010;
	1					3
7
8							# version
9							our $VERSION = '1.31';
10
11
12							#------------------------------------------------------------------------------
13							# opening POD
14							#
15
16							=head1 NAME
17
18							String::Util -- String processing utility functions
19
20							=head1 DESCRIPTION
21
22							String::Util provides a collection of small, handy functions for processing
23							strings in various ways.
24
25							=head1 INSTALLATION
26
27							cpanm String::Util
28
29							=head1 USAGE
30
31							No functions are exported by default, they must be specified:
32
33							use String::Util qw(trim eqq contains)
34
35							alternately you can use C<:all> to export B of the functions
36
37							use String::Util qw(:all)
38
39							=head1 FUNCTIONS
40
41							=cut
42
43							#
44							# opening POD
45							#------------------------------------------------------------------------------
46
47
48
49							#------------------------------------------------------------------------------
50							# export
51							#
52	1			1		6	use base 'Exporter';
	1					2
	1					86
53	1			1		7	use vars qw[@EXPORT_OK %EXPORT_TAGS];
	1					2
	1					3206
54
55							# the following functions accept a value and return a modified version of
56							# that value
57							push @EXPORT_OK, qw[
58							collapse crunch htmlesc trim ltrim
59							rtrim define repeat unquote no_space
60							nospace fullchomp randcrypt jsquote cellfill
61							crunchlines file_get_contents
62							];
63
64							# the following functions return true or false based on their input
65							push @EXPORT_OK, qw[
66							hascontent nocontent eqq equndef neqq neundef
67							startswith endswith contains sanitize
68							];
69
70							# the following function returns a random string of some type
71							push @EXPORT_OK, qw[ randword ];
72
73							# the following function returns the unicode values of a string
74							push @EXPORT_OK, qw[ ords deords ];
75
76							%EXPORT_TAGS = ('all' => [@EXPORT_OK]);
77							#
78							# export
79							#------------------------------------------------------------------------------
80
81
82							#------------------------------------------------------------------------------
83							# collapse
84							#
85
86							=head2 collapse($string)
87
88							C collapses all whitespace in the string down to single spaces.
89							Also removes all leading and trailing whitespace. Undefined input results in
90							undefined output.
91
92							B C is an alias to this function. It is considered deprecated.
93							It may be removed in future versions.
94
95							$var = collapse(" Hello world! "); # "Hello world!"
96
97							=cut
98
99							sub collapse {
100	2			2	1	90	my ($val) = @_;
101
102	2	100				7	if (defined $val) {
103	1					5	$val =~ s\|^\s+\|\|s;
104	1					6	$val =~ s\|\s+$\|\|s;
105	1					5	$val =~ s\|\s+\| \|sg;
106							}
107
108	2					13	return $val;
109							}
110
111							sub crunch {
112	0			0	0	0	return collapse(@_);
113							}
114							#
115							# collapse
116							#------------------------------------------------------------------------------
117
118
119							#------------------------------------------------------------------------------
120							# hascontent
121							#
122
123							=head2 hascontent($scalar), nocontent($scalar)
124
125							hascontent() returns true if the given argument is defined and contains
126							something besides whitespace.
127
128							An undefined value returns false. An empty string returns false. A value
129							containing nothing but whitespace (spaces, tabs, carriage returns, newlines,
130							backspace) returns false. A string containing any other characters (including
131							zero) returns true.
132
133							C returns the negation of C.
134
135							$var = hascontent(""); # False
136							$var = hascontent(" "); # False
137							$var = hascontent("a"); # True
138
139							$var = nocontent(""); # True
140							$var = nocontent("a"); # False
141
142							=cut
143
144							sub hascontent {
145	11			11	1	22	my $val = shift();
146
147	11	100				25	if (!defined($val)) {
148	2					46	return 0;
149							}
150
151							# If there are ANY non-space characters in it
152	9	100				37	if ($val =~ m\|\S\|s) {
153	5					14	return 1;
154							}
155
156	4					12	return 0;
157							}
158
159							sub nocontent {
160	6			6	1	12	my $str = shift();
161
162							# nocontent() is just the inverse to hascontent()
163	6					13	my $ret = !(hascontent($str));
164
165	6					23	return $ret;
166							}
167
168							#
169							# hascontent
170							#------------------------------------------------------------------------------
171
172
173							#------------------------------------------------------------------------------
174							# trim
175							#
176
177							=head2 trim($string), ltrim($string), rtrim($string)
178
179							Returns the string with all leading and trailing whitespace removed.
180							Trim on undef returns "".
181
182							$var = trim(" my string "); # "my string"
183
184							ltrim() trims B whitespace only.
185
186							rtrim() trims B whitespace only.
187
188							=cut
189
190							sub trim {
191	5			5	1	12	my $s = shift();
192
193	5	100	66			37	if (!defined($s) \|\| length($s) == 0) {
194	1					7	return "";
195							}
196
197	4					22	$s =~ s/^\s*//u;
198	4					19	$s =~ s/\s*$//u;
199
200	4					18	return $s;
201							}
202							#
203							# trim
204							#------------------------------------------------------------------------------
205
206
207							#------------------------------------------------------------------------------
208							# ltrim, rtrim
209							#
210
211							sub ltrim {
212	1			1	1	3	my $s = shift();
213
214	1	50	33			10	if (!defined($s) \|\| length($s) == 0) {
215	0					0	return "";
216							}
217
218	1					6	$s =~ s/^\s*//u;
219
220	1					4	return $s;
221							}
222
223							sub rtrim {
224	1			1	1	3	my $s = shift();
225
226	1	50	33			8	if (!defined($s) \|\| length($s) == 0) {
227	0					0	return "";
228							}
229
230	1					8	$s =~ s/\s*$//u;
231
232	1					4	return $s;
233							}
234
235							#
236							# ltrim, rtrim
237							#------------------------------------------------------------------------------
238
239
240
241							#------------------------------------------------------------------------------
242							# no_space
243							#
244
245							=head2 nospace($string)
246
247							Removes B whitespace characters from the given string. This includes spaces
248							between words.
249
250							$var = nospace(" Hello World! "); # "HelloWorld!"
251
252							=cut
253
254							sub no_space {
255	0			0	0	0	return nospace(@_);
256							}
257
258							# alias nospace to no_space
259							sub nospace {
260	3			3	1	7	my $val = shift();
261
262	3	100				9	if (defined $val) {
263	2					10	$val =~ s\|\s+\|\|gs;
264							}
265
266	3					13	return $val;
267							}
268
269							#
270							# no_space
271							#------------------------------------------------------------------------------
272
273
274
275							#------------------------------------------------------------------------------
276							# htmlesc
277							#
278
279							=head2 htmlesc($string)
280
281							Formats a string for literal output in HTML. An undefined value is returned as
282							an empty string.
283
284							htmlesc() is very similar to CGI.pm's escapeHTML. However, there are a few
285							differences. htmlesc() changes an undefined value to an empty string, whereas
286							escapeHTML() returns undefs as undefs.
287
288							=cut
289
290							sub htmlesc {
291	2			2	1	6	my ($val) = @_;
292
293	2	100				6	if (defined $val) {
294	1					5	$val =~ s\|\&\|&\|g;
295	1					4	$val =~ s\|\"\|"\|g;
296	1					3	$val =~ s\|\<\|<\|g;
297	1					3	$val =~ s\|\>\|>\|g;
298							}
299							else {
300	1					2	$val = '';
301							}
302
303	2					8	return $val;
304							}
305							#
306							# htmlesc
307							#------------------------------------------------------------------------------
308
309
310							#------------------------------------------------------------------------------
311							# cellfill
312							#
313
314							sub cellfill{
315	0			0	0	0	my ($val) = @_;
316
317	0					0	carp("cellfill() is deprecated and will be removed in future versions");
318
319	0	0				0	if (hascontent($val)) {
320	0					0	$val = htmlesc($val);
321							}
322
323							else {
324	0					0	$val = ' ';
325							}
326
327	0					0	return $val;
328							}
329							#
330							# cellfill
331							#------------------------------------------------------------------------------
332
333
334							#------------------------------------------------------------------------------
335							# jsquote
336							#
337
338							=head2 jsquote($string)
339
340							Escapes and quotes a string for use in JavaScript. Escapes single quotes and
341							surrounds the string in single quotes. Returns the modified string.
342
343							=cut
344
345							sub jsquote {
346	1			1	1	3	my ($str) = @_;
347
348							# Escape single quotes.
349	1					7	$str =~ s\|'\|\\'\|gs;
350
351							# Break up anything that looks like a closing HTML tag. This modification
352							# is necessary in an HTML web page. It is unnecessary but harmless if the
353							# output is used in a JavaScript document.
354	1					4	$str =~ s\|
355
356							# break up newlines
357	1					5	$str =~ s\|\n\|\\n\|gs;
358
359							# surround in quotes
360	1					4	$str = qq\|'$str'\|;
361
362							# return
363	1					4	return $str;
364							}
365							#
366							# jsquote
367							#------------------------------------------------------------------------------
368
369
370							#------------------------------------------------------------------------------
371							# unquote
372							#
373
374							=head2 unquote($string)
375
376							If the given string starts and ends with quotes, removes them. Recognizes
377							single quotes and double quotes. The value must begin and end with same type
378							of quotes or nothing is done to the value. Undef input results in undef output.
379							Some examples and what they return:
380
381							unquote(q\|'Hendrix'\|); # Hendrix
382							unquote(q\|"Hendrix"\|); # Hendrix
383							unquote(q\|Hendrix\|); # Hendrix
384							unquote(q\|"Hendrix'\|); # "Hendrix'
385							unquote(q\|O'Sullivan\|); # O'Sullivan
386
387							B braces
388
389							If the braces option is true, surrounding braces such as [] and {} are also
390							removed. Some examples:
391
392							unquote(q\|[Janis]\|, braces=>1); # Janis
393							unquote(q\|{Janis}\|, braces=>1); # Janis
394							unquote(q\|(Janis)\|, braces=>1); # Janis
395
396							=cut
397
398							sub unquote {
399	4			4	1	12	my ($val, %opts) = @_;
400
401	4	50				12	if (defined $val) {
402	4	100	66			38	my $found = $val =~ s\|^\`(.*)\`$\|$1\|s or
403							$val =~ s\|^\"(.*)\"$\|$1\|s or
404							$val =~ s\|^\'(.*)\'$\|$1\|s;
405
406	4	50	33			15	if ($opts{'braces'} && ! $found) {
407	0	0	0			0	$val =~ s\|^\[(.*)\]$\|$1\|s or
408							$val =~ s\|^$(.*)$$\|$1\|s or
409							$val =~ s\|^\{(.*)\}$\|$1\|s;
410							}
411							}
412
413	4					18	return $val;
414							}
415							#
416							# unquote
417							#------------------------------------------------------------------------------
418
419
420							#------------------------------------------------------------------------------
421							# define
422							#
423
424							sub define {
425	0			0	0	0	carp("define() is deprecated and may be removed in future version");
426
427	0					0	my ($val) = @_;
428
429							# if overloaded object, get return value and
430							# concatenate with string (which defines it).
431	0	0	0			0	if (ref($val) && overload::Overloaded($val)) {
432	0					0	local $^W = 0;
433	0					0	$val = $val . '';
434							}
435
436	0	0				0	defined($val) or $val = '';
437	0					0	return $val;
438							}
439							#
440							# define
441							#------------------------------------------------------------------------------
442
443
444							#------------------------------------------------------------------------------
445							# repeat
446							#
447
448							=head2 repeat($string, $count)
449
450							Returns the given string repeated the given number of times. The following
451							command outputs "Fred" three times:
452
453							print repeat('Fred', 3), "\n";
454
455							Note that repeat() was created a long time based on a misunderstanding of how
456							the perl operator 'x' works. The following command using 'x' would perform
457							exactly the same as the above command.
458
459							print 'Fred' x 3, "\n";
460
461							Use whichever you prefer.
462
463							=cut
464
465							sub repeat {
466	0			0	1	0	my ($val, $count) = @_;
467	0					0	return ($val x int($count));
468							}
469							#
470							# repeat
471							#------------------------------------------------------------------------------
472
473
474							#------------------------------------------------------------------------------
475							# randword
476							#
477
478							=head2 randword($length, %options)
479
480							Returns a random string of characters. String will not contain any vowels (to
481							avoid distracting dirty words). First argument is the length of the return
482							string. So this code:
483
484							foreach my $idx (1..3) {
485							print randword(4), "\n";
486							}
487
488							would output something like this:
489
490							kBGV
491							NCWB
492							3tHJ
493
494							If the string 'dictionary' is sent instead of an integer, then a word is
495							randomly selected from a dictionary file. By default, the dictionary file
496							is assumed to be at /usr/share/dict/words and the shuf command is used to
497							pull out a word. The hash %String::Util::PATHS sets the paths to the
498							dictionary file and the shuf executable. Modify that hash to change the paths.
499							So this code:
500
501							foreach my $idx (1..3) {
502							print randword('dictionary'), "\n";
503							}
504
505							would output something like this:
506
507							mustache
508							fronds
509							browning
510
511							B alpha
512
513							If the alpha option is true, only alphabetic characters are returned, no
514							numerals. For example, this code:
515
516							foreach my $idx (1..3) {
517							print randword(4, alpha=>1), "\n";
518							}
519
520							would output something like this:
521
522							qrML
523							wmWf
524							QGvF
525
526							B numerals
527
528							If the numerals option is true, only numerals are returned, no alphabetic
529							characters. So this code:
530
531							foreach my $idx (1..3) {
532							print randword(4, numerals=>1), "\n";
533							}
534
535							would output something like this:
536
537							3981
538							4734
539							2657
540
541							B strip_vowels
542
543							This option is true by default. If true, vowels are not included in the
544							returned random string. So this code:
545
546							foreach my $idx (1..3) {
547							print randword(4, strip_vowels=>1), "\n";
548							}
549
550							would output something like this:
551
552							Sk3v
553							pV5z
554							XhSX
555
556							=cut
557
558							# path information for WC
559							our %PATHS = (
560							wc => '/usr/bin/wc',
561							shuf => '/usr/bin/shuf',
562							words => '/usr/share/dict/words',
563							head => '/usr/bin/head',
564							tail => '/usr/bin/tail',
565							);
566
567							sub randword {
568	1			1	1	21	my ($count, %opts) = @_;
569	1					20	my ($rv, $char, @chars, $paths);
570	1					5	$rv = '';
571
572							# check syntax
573	1	50				5	defined($count) or croak 'syntax: randword($count)';
574
575							# dictionary word
576	1	50				4	if ($count =~ m\|^dict\|si) {
577							# loop until acceptable word is found
578							DICTIONARY:
579	0					0	while (1) {
580	0					0	my ($cmd, $line_count, $line_num, $word);
581
582							# use Encode module
583	0					0	require Encode;
584
585							# get line count
586	0					0	$cmd = qq\|$PATHS{'wc'} -l "$PATHS{'words'}"\|;
587	0					0	($line_count) = `$cmd`;
588	0					0	$line_count =~ s\|\s.*\|\|s;
589
590							# get random line
591	0					0	$line_num = rand($line_count);
592
593							# untaint line number
594	0	0				0	unless ($line_num =~ m\|^([0-9]+)$\|s) { die "invalid line number: $line_num" }
	0					0
595	0					0	$line_num = $1;
596
597							# get random word
598	0					0	$cmd = qq[$PATHS{'head'} -$line_num "$PATHS{'words'}" \| $PATHS{'tail'} -1];
599	0					0	($word) = `$cmd`;
600	0					0	$word =~ s\|\s.*\|\|si;
601	0					0	$word =~ s\|'.*\|\|si;
602	0					0	$word = lc($word);
603
604							# only allow words that are all letters
605	0	0				0	if ($opts{'letters_only'}) {
606	0	0				0	unless ($word =~ m\|^[a-z]+$\|s)
607	0					0	{ next DICTIONARY }
608							}
609
610							# check for max length
611	0	0				0	if ($opts{'maxlength'}) {
612	0	0				0	if ( length($word) > $opts{'maxlength'} )
613	0					0	{ next DICTIONARY }
614							}
615
616							# encode unless specifically opted not to do so
617	0	0	0			0	unless ( defined($opts{'encode'}) && (! $opts{'encode'}) )
618	0					0	{ $word = Encode::encode_utf8($word) }
619
620							# return
621	0					0	return $word;
622							}
623							}
624
625							# alpha only
626	1	50				5	if ($opts{'alpha'})
627	0					0	{ @chars = ('a' .. 'z', 'A' .. 'Z') }
628
629							# else alpha and numeral
630							else
631	1					12	{ @chars = ('a' .. 'z', 'A' .. 'Z', '0' .. '9') }
632
633							# defaults
634	1	50				5	defined($opts{'strip_vowels'}) or $opts{'strip_vowels'} = 1;
635
636	1					4	while (length($rv) < $count) {
637	23					78	$char = rand();
638
639							# numerals only
640	23	50				37	if ($opts{'numerals'}) {
641	0					0	$char =~ s\|^0.\|\|;
642	0					0	$char =~ s\|\D\|\|g;
643							}
644
645							# character random word
646							else {
647	23					45	$char = int( $char * ($#chars + 1) );
648	23					34	$char = $chars[$char];
649	23	100	66			77	next if($opts{'strip_vowels'} && $char =~ m/[aeiouy]/i);
650							}
651
652	20					38	$rv .= $char;
653							}
654
655	1					9	return substr($rv, 0, $count);
656							}
657							#
658							# randword
659							#------------------------------------------------------------------------------
660
661
662
663							#------------------------------------------------------------------------------
664							# eqq
665							# formerly equndef
666							#
667
668							=head2 eqq($scalar1, $scalar2)
669
670							Returns true if the two given values are equal. Also returns true if both
671							are undef. If only one is undef, or if they are both defined but different,
672							returns false. Here are some examples and what they return.
673
674							$var = eqq('x', 'x'), "\n"; # True
675							$var = eqq('x', undef), "\n"; # False
676							$var = eqq(undef, undef), "\n"; # True
677
678							B equndef() is an alias to this function. It is considered deprecated.
679							It may be removed in future versions.
680
681							=cut
682
683							# alias equndef to eqq
684	4			4	0	12	sub equndef { return eqq(@_) }
685
686							sub eqq {
687	8			8	1	16	my ($str1, $str2) = @_;
688
689							# if both defined
690	8	100	100			35	if ( defined($str1) && defined($str2) ) {
691	4					20	return $str1 eq $str2
692							}
693
694							# if neither are defined
695	4	50	66			15	if ( (!defined($str1)) && (!defined($str2)) ) {
696	2					10	return 1
697							}
698
699							# only one is defined, so return false
700	2					9	return 0;
701							}
702							#
703							# eqq
704							#------------------------------------------------------------------------------
705
706
707							#------------------------------------------------------------------------------
708							# neqq
709							# formerly neundef
710							#
711
712							=head2 neqq($scalar1, $scalar2)
713
714							The opposite of neqq, returns true if the two values are not the same.
715							Here are some examples and what they return.
716
717							$var = neqq('x', 'x'), "\n"; # False
718							$var = neqq('x', undef), "\n"; # True
719							$var = neqq(undef, undef), "\n"; # False
720
721							B neundef() is an alias to this function. It is considered deprecated.
722							It may be removed in future versions.
723
724							=cut
725
726							sub neundef {
727	4	100		4	0	13	return eqq(@_) ? 0 : 1;
728							}
729
730							sub neqq {
731	0	0		0	1	0	return eqq(@_) ? 0 : 1;
732							}
733							#
734							# neqq
735							#------------------------------------------------------------------------------
736
737
738							#------------------------------------------------------------------------------
739							# fullchomp
740							#
741
742							sub fullchomp {
743	0			0	0	0	carp("fullchomp() is deprecated and may be removed in future versions");
744
745	0					0	my ($line) = @_;
746	0	0				0	defined($line) and $line =~ s\|[\r\n]+$\|\|s;
747	0	0				0	defined(wantarray) and return $line;
748	0					0	$_[0] = $line;
749							}
750							#
751							# fullchomp
752							#------------------------------------------------------------------------------
753
754
755							#------------------------------------------------------------------------------
756							# randcrypt
757							#
758
759							sub randcrypt {
760	0			0	0	0	carp("randcrypt() is deprecated and may be removed in future versions");
761
762	0					0	my $pw = shift();
763	0					0	my $ret;
764	0					0	$ret = crypt($pw, randword(2));
765
766	0					0	return $ret;
767							}
768							#
769							# randcrypt
770							#------------------------------------------------------------------------------
771
772
773							#------------------------------------------------------------------------------
774							# ords
775							#
776
777							=head2 ords($string)
778
779							Returns the given string represented as the ascii value of each character.
780
781							$var = ords('Hendrix'); # {72}{101}{110}{100}{114}{105}{120}
782
783							B
784
785							=over 4
786
787							=item * convert_spaces=>[true\|false]
788
789							If convert_spaces is true (which is the default) then spaces are converted to
790							their matching ord values. So, for example, this code:
791
792							$var = ords('a b', convert_spaces=>1); # {97}{32}{98}
793
794							This code returns the same thing:
795
796							$var = ords('a b'); # {97}{32}{98}
797
798							If convert_spaces is false, then spaces are just returned as spaces. So this
799							code:
800
801							ords('a b', convert_spaces=>0); # {97} {98}
802
803
804							=item * alpha_nums
805
806							If the alpha_nums option is false, then characters 0-9, a-z, and A-Z are not
807							converted. For example, this code:
808
809							$var = ords('a=b', alpha_nums=>0); # a{61}b
810
811							=back
812
813							=cut
814
815							sub ords {
816	0			0	1	0	my ($str, %opts) = @_;
817	0					0	my (@rv, $show_chars);
818
819							# default options
820	0					0	%opts = (convert_spaces=>1, alpha_nums=>1, %opts);
821
822							# get $show_chars option
823	0					0	$show_chars = $opts{'show_chars'};
824
825							# split into individual characters
826	0					0	@rv = split '', $str;
827
828							# change elements to their unicode numbers
829							CHAR_LOOP:
830	0					0	foreach my $char (@rv) {
831							# don't convert space if called so
832	0	0	0			0	if ( (! $opts{'convert_spaces'}) && ($char =~ m\|^\s$\|s) )
833	0					0	{ next CHAR_LOOP }
834
835							# don't convert alphanums
836	0	0				0	if (! $opts{'alpha_nums'}) {
837	0	0				0	if ( $char =~ m\|^[a-z0-9]$\|si) {
838	0					0	next CHAR_LOOP;
839							}
840							}
841
842	0					0	my $rv = '{';
843
844	0	0				0	if ($show_chars)
845	0					0	{ $rv .= $char . ':' }
846
847	0					0	$rv .= ord($char) . '}';
848
849	0					0	$char = $rv;
850							}
851
852							# return words separated by spaces
853	0					0	return join('', @rv);
854							}
855							#
856							# ords
857							#------------------------------------------------------------------------------
858
859
860							#------------------------------------------------------------------------------
861							# deords
862							#
863
864							=head2 deords($string)
865
866							Takes the output from ords() and returns the string that original created that
867							output.
868
869							$var = deords('{72}{101}{110}{100}{114}{105}{120}'); # 'Hendrix'
870
871							=cut
872
873							sub deords {
874	0			0	1	0	my ($str) = @_;
875	0					0	my (@tokens, $rv);
876	0					0	$rv = '';
877
878							# get tokens
879	0					0	@tokens = split(m\|[\{\}]\|s, $str);
880	0					0	@tokens = grep {length($_)} @tokens;
	0					0
881
882							# build return string
883	0					0	foreach my $token (@tokens) {
884	0					0	$rv .= chr($token);
885							}
886
887							# return
888	0					0	return $rv;
889							}
890							#
891							# deords
892							#------------------------------------------------------------------------------
893
894							=head2 contains($string, $substring)
895
896							Checks if the string contains substring
897
898							$var = contains("Hello world", "Hello"); # true
899							$var = contains("Hello world", "llo wor"); # true
900							$var = contains("Hello world", "QQQ"); # false
901
902							=cut
903
904							sub contains {
905	4		50	4	1	13	my $str = shift() \|\| "";
906	4					7	my $substr = shift();
907
908	4	50				7	if (!$substr) {
909	0					0	$substr = $str;
910	0					0	$str = $_;
911							}
912
913	4					12	my $ret = index($str, $substr, 0) != -1;
914
915	4					26	return $ret;
916							}
917
918							=head2 startswith($string, $substring)
919
920							Checks if the string starts with the characters in substring
921
922							$var = startwidth("Hello world", "Hello"); # true
923							$var = startwidth("Hello world", "H"); # true
924							$var = startwidth("Hello world", "Q"); # false
925
926							=cut
927
928							sub startswith {
929	4		50	4	1	14	my $str = shift() \|\| "";
930	4					6	my $substr = shift();
931
932	4	50				9	if (!$substr) {
933	0					0	$substr = $str;
934	0					0	$str = $_;
935							}
936
937	4					10	my $ret = index($str, $substr, 0) == 0;
938
939	4					16	return $ret;
940							}
941
942							=head2 sanitize($string)
943
944							Sanitize all non alpha-numeric characters in a string to underscores.
945							This is useful to take a URL, or filename, or text description and know
946							you can use it safely in a URL or a filename.
947
948							B This will remove any trailing or leading '_' on the string
949
950							$var = sanitize("http://www.google.com/") # http_www_google_com
951							$var = sanitize("foo_bar()"; # foo_bar
952							$var = sanitize("/path/to/file.txt"); # path_to_file_txt
953
954							=cut
955
956							sub sanitize {
957	3			3	1	8	my $str = shift();
958
959	3					26	$str =~ s/[\W_]+/_/g;
960	3					8	$str =~ s/\A_+//g;
961	3					12	$str =~ s/_+\z//g;
962
963	3					13	return $str;
964							}
965
966							=head2 endswith($string, $substring)
967
968							Checks if the string ends with the characters in substring
969
970							$var = endswidth("Hello world", "world"); # true
971							$var = endswidth("Hello world", "d"); # true
972							$var = endswidth("Hello world", "QQQ"); # false
973
974							=cut
975
976							sub endswith {
977	4		50	4	1	14	my $str = shift() \|\| "";
978	4					6	my $substr = shift();
979
980	4	50				10	if (!$substr) {
981	0					0	$substr = $str;
982	0					0	$str = $_;
983							}
984
985	4					6	my $len = length($substr);
986	4					7	my $start = length($str) - $len;
987
988	4					9	my $ret = index($str, $substr, $start) != -1;
989
990	4					15	return $ret;
991							}
992
993							#------------------------------------------------------------------------------
994							# crunchlines
995							#
996
997							=head2 crunchlines($string)
998
999							Compacts contiguous newlines into single newlines. Whitespace between newlines
1000							is ignored, so that two newlines separated by whitespace is compacted down to a
1001							single newline.
1002
1003							$var = crunchlines("x\n\n\nx"); # "x\nx";
1004
1005							=cut
1006
1007							sub crunchlines {
1008	0			0	1	0	my ($str) = @_;
1009
1010	0					0	while($str =~ s\|\n[ \t]*\n\|\n\|gs)
1011							{}
1012
1013	0					0	$str =~ s\|^\n\|\|s;
1014	0					0	$str =~ s\|\n$\|\|s;
1015
1016	0					0	return $str;
1017							}
1018							#
1019							# crunchlines
1020							#------------------------------------------------------------------------------
1021
1022							=head2 file_get_contents($string, $boolean)
1023
1024							Read an entire file from disk into a string. Returns undef if the file
1025							cannot be read for any reason. Can also return the file as an array of
1026							lines.
1027
1028							$str = file_get_contents("/tmp/file.txt"); # Return a string
1029							@lines = file_get_contents("/tmp/file.txt", 1); # Return an array
1030
1031							=cut
1032
1033							sub file_get_contents {
1034	2			2	1	305	my ($file, $ret_array) = @_;
1035
1036	2	50				92	open (my $fh, "<", $file) or return undef;
1037
1038	2					7	my $ret;
1039	2					64	while (<$fh>) {
1040	468					828	$ret .= $_;
1041							}
1042
1043	2	100				9	if ($ret_array) {
1044	1					183	return split(/\r?\n/,$ret);
1045							}
1046
1047	1					26	return $ret;
1048							}
1049
1050							# return true
1051							1;
1052
1053
1054							__END__