File Coverage

blib/lib/Text/Embed.pm

Criterion	Covered	Total	%
statement	97	114	85.0
branch	27	46	58.7
condition	7	15	46.6
subroutine	16	21	76.1
pod	5	5	100.0
total	152	201	75.6

line	stmt	bran	cond	sub	pod	time	code
1							package Text::Embed;
2
3	1			1		6746	use strict;
	1					2
	1					35
4	1			1		5	use warnings;
	1					1
	1					30
5	1			1		5	use Carp;
	1					5
	1					568
6
7							our $VERSION = '0.03';
8
9							my %modules = ();
10							my %regexen = ();
11							my %callbacks = ();
12							my %handles = ();
13
14							my $rex_proc = undef;
15							my $rex_parse = undef;
16
17							my $NL = '(?:\r?\n)';
18							my $VARS = '\$$(\w+)$';
19
20							#
21							# Default handlers for parsing - see POD
22							#
23
24							my %def_parse =
25							(
26							':underscore' => qr/${NL}__([^_].*[^_])__$NL/,
27							':define' => qr/${NL}#define\s+?(\S+?)(?:$NL\|\s+?$NL\|\s+?)/,
28							':cdata' => sub{$_ = shift or return;
29							return($$_ =~ m#\s?\s#sgo);
30							},
31							);
32
33							$def_parse{':default'} = $def_parse{':underscore'};
34							$rex_parse = join('\|', keys %def_parse);
35
36							#
37							# Default handlers for processing - see POD
38							#
39
40							my %def_proc =
41							(
42							':raw' => undef,
43							':trim' => sub{ trim($_[1]); },
44							':compress' => sub{ compress($_[1]); },
45							':block-indent' => sub{ block($_[1]); },
46							':block-noindent' => sub{ block($_[1],1); },
47
48							':strip-cpp' => sub{strip($_[1],'/\','\/'),strip($_[1], '//');},
49							':strip-c' => sub{strip($_[1],'/\','\/');},
50							':strip-xml' => sub{strip($_[1],'');},
51							':strip-perl' => sub{strip($_[1]);},
52							);
53
54							$def_proc{':default'} = $def_proc{':raw'};
55							$rex_proc = join('\|', keys %def_proc);
56
57							#
58							# import:
59							# process arguments and tie caller's %DATA
60							#
61							sub import
62							{
63	16			16		30	my $package = shift;
64	16					16	my $regex = shift;
65	16	50				54	my $cback = @_ ? [@_] : undef;
66	16					31	my $caller = caller;
67
68	16	50	33			175	$regex = $def_parse{$regex} if($regex && $regex =~ /^$rex_parse$/);
69	16	50				36	$regex = $def_parse{':default'}unless $regex;
70
71							# NB: test for existence...
72	16	50				40	if(!exists $modules{$caller}){
73							# process all callbacks that are stringified
74	1			1		5	no strict 'refs';
	1					2
	1					329
75	16	50				29	if($cback){
76	16					32	foreach(@$cback){
77	18	50				34	if(!ref $_){
78	18	100				216	if($_ =~ /^$rex_proc$/){
79							# predefined alias
80	17					57	$_ = $def_proc{$_};
81							}
82							else{
83							# stringy code ref - relative or absolute
84	0					0	$_ = ($_ =~ /\:\:/go) ? \&{$_} :
	1					9
85	1	50				5	\&{$caller."\::".$_};
86							}
87							}
88							else{
89	0	0				0	Carp::croak("Not a CODE reference")
90							unless "CODE" eq ref($_);
91							}
92							}
93							}
94
95	16					21	*{"$caller\::DATA"} = {};
	16					66
96	16					17	tie %{"$caller\::DATA"}, $package, $caller;
	16					66
97
98							# store private attributes till lazy-loading DATA
99	16					21	$handles{$caller} = \*{$caller."::DATA"};
	16					37
100	16					25	$modules{$caller} = undef;
101	16	50				37	$regexen{$caller} = (ref $regex) ? $regex : qr($regex);
102	16					667	$callbacks{$caller} = $cback;
103							}
104							}
105
106							#
107							# _read_data:
108							# Parse and process DATA handle once %DATA has been used.
109							# Cant do during import as Perl hasn't parsed that far by then
110							#
111							sub _read_data
112							{
113	16			16		20	my $self = shift;
114
115							# NB:test for definedness...
116	16	50				41	if(! defined $modules{$$self})
117							{
118	16					17	my (@data, $data, $tell, $rex, $code, $strip);
119	16					25	$rex = delete $regexen{$$self};
120	16					24	$code = delete $callbacks{$$self};
121	16					21	$data = delete $handles{$$self};
122
123							{
124							# slurp and parse...
125	1			1		4	no warnings;
	1					2
	1					1218
	16					22
126	16					71	local $/ = undef;
127	16					164	binmode($data);
128
129	16					21	$tell = tell($data);
130	16	50				61	Carp::croak("Error: $$self has no __DATA__ section")
131							if ($tell < 0);
132
133	16					249	my $d = <$data>;
134	16	100				275	@data = (ref($rex) eq "CODE") ? $rex->(\$d) :
135							split(/$rex/, $d);
136							}
137
138	16	50	0			43	$modules{$$self} = {} and return
139							unless @data;
140
141							# remove empty elements...depends on syntax used
142	16	100				62	shift @data if $data[0] =~ /^\s*$/o;
143	16	50				164	pop @data if $data[-1] =~ /^\s*$/o;
144	16	50				34	Carp::croak("Error: \%$$self\::DATA - bad key/value pairs")
145							if (@data % 2);
146
147							# invoke any callbacks...
148	16	50				30	if($code){
149	16					36	for(my $i=0; $i<@data; $i+=2){
150							$_ && $_->(\$data[$i], \$data[$i+1])
151	49		100			204	foreach @$code;
152							}
153							}
154
155							# coerce into hashref and cover our tracks
156	16					65	$modules{$$self} = {@data};
157	16					26	delete $modules{$$self}{''};
158	16					114	seek($data, $tell,0);
159							}
160							}
161
162							#
163							# Utility functions - see POD
164							#
165
166							#
167							# compress: trim and compact all whitspace to ' '
168							#
169							sub compress
170							{
171	9			9	1	10	my $txt = shift;
172	9					154	s#\s+# #gs, s#^\s+## , s#\s+$## for($$txt);
173							}
174
175							#
176							# block: preserve common indentation and surrounding newlines
177							#
178							sub block
179							{
180	18			18	1	20	my $txt = shift;
181	18					17	my $i = shift;
182	18	100				31	if($i){
183							# strip smallest common indentation
184	9					171	($i) = sort {length($a) <=> length($b)} $$txt =~ m#^$NL?(\s+)\S#mg;
	3					14
185	9					126	s#^$i##mg for($$txt);
186							}
187	18					179	s#^\s+$##mg for($$txt);
188							}
189
190							#
191							# trim: remove trailing and leading whitespace
192							#
193							sub trim
194							{
195	9			9	1	10	my $txt = shift;
196	9					104	s#^\s+##, s#\s+$## for($$txt);
197							}
198
199							#
200							# strip: remove (comment) sequences
201							#
202							sub strip
203							{
204	12			12	1	19	my $txt = shift;
205	12		50			22	my $beg = shift \|\| '\#';
206	12		66			30	my $end = shift \|\| $NL;
207	12					227	$$txt =~ s#$beg.*?$end##sgi;
208							}
209
210							#
211							# interpolate: simple template interpolation
212							#
213							sub interpolate
214							{
215	15			15	1	127	my $txt = shift;
216	15					17	my $vals = shift;
217	15		33			53	my $rex = shift \|\| $VARS;
218	15					219	$$txt =~ s#$rex#$vals->{$1}#sg;
219							}
220
221							#
222							# TIE HASH interface (read-only)
223							# not much to see here...
224							#
225
226							sub TIEHASH
227							{
228	16			16		26	my $class = shift;
229	16					19	my $caller = shift;
230	16					45	return bless \$caller, $class;
231							}
232
233							sub FETCH
234							{
235	105			105		607	my $self = shift;
236	105					133	my $key = shift;
237	105	100				228	$self->_read_data if(! defined $modules{$$self});
238	105					500	return $modules{$$self}{$key};
239							}
240
241							sub EXISTS
242							{
243	0			0		0	my $self = shift;
244	0					0	my $key = shift;
245	0	0				0	$self->_read_data if(! defined $modules{$$self});
246	0					0	return exists $modules{$$self}{$key};
247							}
248
249							sub FIRSTKEY
250							{
251	30			30		96	my $self = shift;
252	30	100				83	$self->_read_data if(! defined $modules{$$self});
253	30					27	my $a = keys %{$modules{$$self}};
	30					67
254	30					32	return each %{$modules{$$self}};
	30					169
255							}
256
257							sub NEXTKEY
258							{
259	105			105		115	my $self = shift;
260	105	50				187	$self->_read_data if(! defined $modules{$$self});
261	105					86	return each %{ $modules{$$self} }
	105					345
262							}
263
264							sub DESTROY
265							{
266	0			0			my $self = shift;
267	0						$modules{$$self} = undef;
268							}
269
270							sub STORE
271							{
272	0			0			my $self = shift;
273	0						my $k = shift;
274	0						my $v = shift;
275							#$self->_read_data if(! defined $modules{$$self});
276	0						Carp::croak("Attempt to store key ($k) in read-only hash \%DATA");
277							}
278
279							sub DELETE
280							{
281	0			0			my $self = shift;
282	0						my $k = shift;
283							#$self->_read_data if(! defined $modules{$$self});
284	0						Carp::croak("Attempt to delete key ($k) from read-only hash \%DATA");
285							}
286
287							sub CLEAR
288							{
289	0			0			my $self = shift;
290							#$self->_read_data if(! defined $modules{$$self});
291	0						Carp::croak("Attempt to clear read-only hash \%DATA");
292							}
293
294
295							1;
296
297
298							=pod
299
300							=head1 NAME
301
302							Text::Embed - Cleanly seperate unwieldy text from your source code
303
304							=head1 SYNOPSIS
305
306							use Text::Embed
307							use Text::Embed CODE\|REGEX\|SCALAR
308							use Text::Embed CODE\|REGEX\|SCALAR, LIST
309
310							=head1 ABSTRACT
311
312							Code often requires chunks of text to operate - chunks not large enough
313							to warrant extra file dependencies, but enough to make using quotes and
314							heredocs' ugly.
315
316							A typical example might be code generators. The text itself is code,
317							and as such is difficult to differentiate and maintain when it is
318							embedded inside more code. Similarly, CGI scripts often include
319							embedded HTML or SQL templates.
320
321							B provides the programmer with a flexible way to store
322							these portions of text in their namespace's __DATA__ handle - I
323							from the logic> - and access them through the package variable B<%DATA>.
324
325							=head1 DESCRIPTION
326
327							=head2 General Usage:
328
329							The general usage is expected to be suitable for a majority of cases:
330
331							use Text::Embed;
332
333							foreach(keys %DATA)
334							{
335							print "$_ = $DATA{$_}\n";
336							}
337
338							print $DATA{foo};
339
340
341
342							__DATA__
343
344							__foo__
345
346							yadda yadda yadda...
347
348							__bar__
349
350							ee-aye ee-aye oh
351
352							__baz__
353
354							woof woof
355
356							=head2 Custom Usage:
357
358							There are two stages to B's execution - corresponding to the
359							first and remaining arguments in its invocation.
360
361							use Text::Embed (
362							sub{ ... }, # parse key/values from DATA
363							sub{ ... }, # process pairs
364							... # process pairs
365							);
366
367							...
368
369							__DATA__
370
371							...
372
373							=head3 Stage 1: Parsing
374
375							By default, B uses similar syntax to the __DATA__ token to
376							seperate segments - a line consisting of two underscores surrounding an
377							identifier. Of course, a suitable syntax depends on the text being embedded.
378
379							A REGEX or CODE reference can be passed as the first argument - in order
380							to gain finer control of how __DATA__ is parsed:
381
382							=over 4
383
384							=item REGEX
385
386							use Text::Embed qr(<<<<<<<<(\w*?)>>>>>>>>);
387
388							A regular expression will be used in a call to C. Any
389							leading or trailing empty strings will be removed automatically.
390
391							=item CODE
392
393							use Text::Embed sub{$_ = shift; ...}
394							use Text::Embed &Some::Other::Function;
395
396							A subroutine will be passed a reference to the __DATA__ I.
397							It should return a LIST of key-value pairs.
398
399							=back
400
401							In the name of laziness, B provides a couple of
402							predefined formats:
403
404							=over 4
405
406							=item :default
407
408							Line-oriented __DATA__ like format:
409
410							__BAZ__
411							baz baz baz
412							__FOO__
413							foo foo foo
414							foo foo foo
415
416							=item :define
417
418							CPP-like format (%DATA is readonly - can be used to define constants):
419
420							#define BAZ baz baz baz
421							#define FOO foo foo foo
422							foo foo foo
423
424							=item :cdata
425
426							Line-agnostic CDATA-like format. Anything outside of tags is ignored.
427
428
429
430							foo foo foo
431							foo foo foo
432							]]>
433
434							=back
435
436							=head3 Stage 2: Processing
437
438							After parsing, each key-value pair can be further processed by an arbitrary
439							number of callbacks.
440
441							A common usage of this might be controlling how whitespace is represented
442							in each segment. B provides some likely defaults which operate
443							on the hash values only.
444
445							=over 4
446
447							=item :trim
448
449							Removes trailing or leading whitespace
450
451							=item :compress
452
453							Substitutes zero or more whitspace with a single
454
455							=item :block-indent
456
457							Removes trailing or leading blank lines, preserves all indentation
458
459							=item :block-noindent
460
461							Removes trailing or leading blank lines, preserves unique indentation
462
463							=item :raw
464
465							Leave untouched
466
467							=item :default
468
469							Same as B<:raw>
470
471							=back
472
473							If you need more control, CODE references or named subroutines can be
474							invoked as necessary. At this point it is safe to rename or modify keys.
475							Undefining a key removes the entry from B<%DATA>.
476
477							=head3 An Example Callback chain
478
479							For the sake of brevity, consider a module that has some embedded SQL.
480							We can implement a processing callback that will prepare each statement,
481							leaving B<%DATA> full of ready to execute DBI statement handlers:
482
483							package Whatever;
484
485							use DBI;
486							use Text::Embed(':default', ':trim', 'prepare_sql');
487
488							my $dbh;
489
490							sub prepare_sql
491							{
492							my ($k, $v) = @_;
493							if(!$dbh)
494							{
495							$dbh = DBI->connect(...);
496							}
497							$$v = $dbh->prepare($$v);
498							}
499
500							sub get_widget
501							{
502							my $id = shift;
503							my $sql = $DATA{select_widget};
504
505							$sql->execute($id);
506
507							if($sql->rows)
508							{
509							...
510							}
511							}
512
513
514							__DATA__
515
516							__select_widget__
517							SELECT * FROM widgets WHERE widget_id = ?;
518
519							__create_widget__
520							INSERT INTO widgets (widget_id,desc, price) VALUES (?,?,?);
521
522							..etc
523
524							Notice that each pair is I.
525
526							=head3 Utility Functions
527
528							Several utility functions are available to aid implementing custom
529							processing handlers. These are not exported into the callers namespace.
530
531							The first are equivalent to the default processing options:
532
533							=over 4
534
535							=item Text::Embed::trim SCALARREF
536
537							use Text::Embed(':default',':trim');
538							use Text::Embed(':default', sub {Text::Embed::trim($_[1]);} );
539
540							=item Text::Embed::compress SCALARREF
541
542							use Text::Embed(':default',':compress');
543							use Text::Embed(':default', sub {Text::Embed::compress($_[1]);} );
544
545							=item Text::Embed::block SCALARREF BOOLEAN
546
547							use Text::Embed(':default',':block-indent');
548							use Text::Embed(':default', sub {Text::Embed::block($_[1]);} );
549
550							If a true value is passed as the second argument, then shared
551							indentation is removed, ie B<:block-noindent>.
552
553							=back
554
555							=head3 Commenting
556
557							If comments would make your segments easier to manage, B
558							provides defaults handlers for stripping common comment syntax -
559							B<:strip-perl>, B<:strip-c>, B<:strip-cpp>, B<:strip-xml>.
560
561							=over 4
562
563							=item Text::Embed::strip SCALARREF [REGEX] [REGEX]
564
565							use Text::Embed(':default',':strip-c');
566							use Text::Embed(':default', sub {Text::Embed::strip($_[1], '/\', '\/');} );
567
568							Strips all sequences between second and third arguments. The default
569							arguments are '#' and '\n' respectively.
570
571							=back
572
573							=head3 Templating
574
575							Typically, embedded text may well be some kind of template. Text::Embed
576							provides rudimentary variable interpolation for simple templates.
577							The default variable syntax is of the form C<$(foo)>:
578
579							=over 4
580
581							=item Text::Embed::interpolate SCALARREF HASHREF [REGEX]
582
583							my $tmpl = "Hello $(name)! Your age is $(age)\n";
584							my %vars = (name => 'World', age => 4.5 * (10 ** 9));
585
586							Text::Embed::interpolate(\$tmpl, \%vars);
587							print $tmpl;
588
589							Any interpolation is done via a simple substitution. An additional
590							regex argument should accomodate this appropriately, by capturing
591							the necessary hashkey in C<$1>:
592
593							Text::Embed::interpolate(\$tmpl, \%vars, '<%(\S+)%>');
594
595							=back
596
597							=head1 BUGS & CAVEATS
598
599							The most likely bugs related to using this module should manifest
600							themselves as C error messages. There are two related
601							causes:
602
603							=over 4
604
605							=item COMMENTS
606
607							It is important to realise that B does I have its own
608							comment syntax or preprocessor. Any parser that works using C is
609							likely to fail if comments precede the first segment. I
610							exist in the body of a segment - not preceding it>.
611
612							=item CUSTOM PARSING
613
614							If you are defining your own REGEX parser, make sure you understand
615							how it works when used with C - particularly if your syntax
616							wraps your data. Consider using a subroutine for anything non-trivial.
617
618							=back
619
620							If you employ REGEX parsers, use seperators that are I
621							different - and well spaced - from your data, rather than relying on
622							complicated regular expressions to escape pathological cases.
623
624							Bug reports and suggestions are most welcome.
625
626							=head1 AUTHOR
627
628							Copyright (C) 2005 Chris McEwan - All rights reserved.
629
630							Chris McEwan
631
632							=head1 LICENSE
633
634							This program is free software; you can redistribute it and/or modify it
635							under the same terms as Perl itself.
636
637							=cut
638