File Coverage

blib/lib/Bricklayer/Templater/Parser.pm

Criterion	Covered	Total	%
statement	57	83	68.6
branch	15	28	53.5
condition	4	8	50.0
subroutine	5	5	100.0
pod	3	3	100.0
total	84	127	66.1

line	stmt	bran	cond	sub	pod	time	code
1							#-------------------------------------------------------------------------------
2							#
3							# File: parser.pm
4							# Version: 0.9.1
5							# Author: Jason Wall and Jeremy Wall
6							# Definition: The parser package has a simple and single job to do, and that
7							# is to take a block of text and turn it into an ordered array of
8							# tokens containing either a text block or a tag/tag block.
9							# Each token will contain both the block and a flag noting what
10							# kind of token it is (text or tag). The package then returns the
11							# array.
12							#
13							# Revised by Jeremy Wall to fix errors when parsing undefined Tag
14							# Start definitions.
15							#-------------------------------------------------------------------------------
16							package Bricklayer::Templater::Parser;
17
18							=head1 NAME
19
20							Bricklayer::Templater::Parser - The parsing engine for Bricklayer::Templater
21
22							=head1 Methods
23
24							=cut
25
26	2			2		4250	use strict;
	2					5
	2					69
27	2			2		10	use Carp;
	2					2
	2					3943
28
29							# Global Variables
30							my $DefaultTagID = 'BK';
31
32							# Package Methods
33							sub parse_text {
34							# Function wide variables
35	7			7	1	2201	my $textblock = shift; #this variable contains the block of text to be parsed.
36	7	100				41	my $tagID = shift or croak('Must pass in tagid'); # this variable contains the ID of the tag it is optional
37	5	100				21	my $startbracket = shift or croak('Must pass in startbracket');
38	4	50				9	my $endbracket = shift or croak('Must pass in endbracket');
39	4					11	my $TagStartText = $startbracket.$tagID; # Start tag prefix (it gets passed in)
40	4					8	my $TagEndText = $startbracket.'/'.$tagID; # End tag prefix (it also gets passed in)
41							#carp("startbracket: $startbracket");
42							#carp("endbracket: $endbracket");
43							#carp("TagStartText: $TagStartText");
44							#carp("TagEndText: $TagEndText");
45	4					6	my @tokens = (); #this variable will contain the tokenized text.
46
47							# loop through the string tokenizing as you go. Since you are actually removing
48							# the pieces of text as you go, when you are finished, the string should be empty.
49	4		66			25	while (defined $textblock and $textblock ne "") {
50							# it becomes necessary to define these here, as each iteration through
51							# the loop will redefine them.
52
53	8					11	my %token;
54							my $tagname;
55	0					0	my $block;
56	0					0	my %attributes;
57
58	0					0	my $endpos; # this variable is used variously throughout the process.
59
60							# check to see if the first part of the string is not a tag, ie doesn't start with the Start Tag Prefix..
61	8	50				36	if (substr($textblock, 0, length($TagStartText)) ne $TagStartText) {
		50
62							#carp("In a text block");
63							# find the begining of the tag.
64	0					0	$endpos = index($textblock, $TagStartText);
65							# populate the text block
66	0	0				0	if ($endpos == -1) {
67							#carp("No more text to process");
68							# if there are no more template tags, put the remaining text into the block
69	0					0	$block = substr($textblock, 0);
70							#remove the text block from $textblock.
71	0					0	$textblock = "";
72							} else {
73							# if there are tags remaining, put the text that is in front of tag into the block
74	0					0	$block = substr($textblock, 0, $endpos);
75							#remove the text block from $textblock.
76	0					0	$textblock = substr($textblock, $endpos);
77							}
78
79							# create and populate the token
80	0					0	%token = (
81							type => "text",
82							tagname => "text_block",
83							block => $block,
84							tagid => $tagID
85							);
86
87							#put the token at the bottom of the stack.
88	0					0	push @tokens, \%token;
89							}
90							# check to see if the first part of the string is a tag
91							elsif (substr($textblock, 0, length($TagStartText)) eq $TagStartText) {
92	8					10	my $tag;
93
94							# get the tag from the string.
95	8					18	$tag = substr($textblock,0,index($textblock, $endbracket) +1);
96
97							# This is where we determine if the tag is a block or a single tag.
98							# We do this by looking for the forward slash, if it doesn't find one
99							# then it means the tag is a block.
100	8	50				22	if (index($tag, "/$endbracket") == -1) {
101
102							# Get the tag name.
103	8					17	$tagname = substr($tag, length($TagStartText), index($tag, $endbracket)-length($TagStartText));
104	8	100				17	if (index($tagname, ' ') != -1) {
105	3					7	$tagname = substr($tagname, 0, index($tagname, ' '));
106							}
107
108							# Get the tag block.
109							# But first remove the opening tag from $textblock.
110	8					15	$textblock = substr($textblock, index($textblock, $endbracket)+1);
111
112
113							# before going on, you must check for nested tags identical to the current tag.
114	8					11	my $open_tag = $TagStartText.$tagname;
115	8					11	my $close_tag = $TagEndText.$tagname;
116	8					26	my $open_len = length($open_tag);
117	8					9	my $close_len = length($close_tag);
118	8					9	my $ignore = 0;
119	8					711	my $found_end = 0;
120							#is there an open tag in our text block still?
121	8					15	my $idx_open_tag = index($textblock,$open_tag);
122							#is there a close tag in our text block?
123	8					12	my $idx_close_tag = index($textblock,$close_tag);
124
125	8					26	while (not $found_end) {
126
127	8	50	33			24	if ($idx_open_tag == -1 or $idx_open_tag > $idx_close_tag) {
128							# there is not open tag or there is an open tag before the close tag
129							# This is a possible end tag, check to see if it is what
130							# we are looking for
131	8	50				13	if ($ignore > 0 ) {
132							# this is not the end tag you are looking for...
133	0					0	$ignore--;
134							# find the index of the next close tag after this one;
135	0					0	$idx_close_tag = index($textblock, $close_tag, $idx_close_tag+$close_len)
136
137							} else {
138							# This is the end tag you are looking for.
139	8					19	$found_end = 1;
140							}
141							} else {# there is another open tag before our first close tag
142							# this is a nested open tag, it needs to be checked
143							# to see if it is self closing.
144	0					0	my $nested_tag =
145							substr($textblock,$idx_open_tag,
146							index($textblock, $endbracket, $idx_open_tag)-$idx_open_tag+1);
147	0	0				0	if (index($nested_tag, "/$endbracket") == -1) {
148							# this is a nested block tag
149	0					0	$ignore++;
150							}
151							# find the next open tag.
152	0					0	$idx_open_tag = index($textblock, $open_tag, $idx_open_tag+$open_len)
153							}
154							}
155							# grab our block from the text block start to the closing tags index
156	8					13	$block = substr($textblock, 0, $idx_close_tag);
157
158							# and remove the block of text from $textblock.
159	8					11	$textblock = substr($textblock, $idx_close_tag);
160							# remove the closing tag from $textblock.
161	8					15	$textblock = substr($textblock, index($textblock, $endbracket)+1);
162
163							# get the attributes.
164	8					14	$tag = substr($tag,length($TagStartText));
165	8					14	$tag = substr($tag, 0, index($tag, $endbracket));
166
167	8					17	%attributes = parse_attributes($tag);
168
169							# populate the token
170	8					45	%token = (
171							type => "container",
172							tagname => $tagname,
173							block => $block,
174							attributes => \%attributes,
175							tagid => $tagID
176							);
177
178
179							} else {
180							# this parses a single tag out.
181
182							# Get the tag name.
183	0					0	$tagname = substr($tag, length($TagStartText), index($tag, $endbracket)-(length($TagStartText)+1));
184	0	0				0	if (index($tagname, ' ') != -1) {
185	0					0	$tagname = substr($tagname, 0, index($tagname, ' '));
186							}
187
188							# remove the tag from $textblock.
189	0					0	$textblock = substr($textblock, index($textblock, $endbracket)+1);
190
191							# get the attributes.
192	0					0	$tag = substr($tag,length($TagStartText));
193	0					0	$tag = substr($tag, 0, index($tag, $endbracket));
194	0					0	%attributes = parse_attributes($tag);
195
196							# populate the token
197	0					0	%token = (
198							type => "single",
199							tagname => $tagname,
200							block => undef,
201							attributes => \%attributes,
202							tagid => $tagID
203							);
204							}
205
206							# put the token on the bottom of the stack.
207	8					43	push @tokens, \%token;
208							}
209							}
210
211
212	4					24	return @tokens;
213							}
214
215
216							# this function knows how to parse the attributes out of a textblock
217							# and returns them in a hash.
218							sub parse_attributes {
219
220							#variable declarations
221	8			8	1	12	my $atag = shift;
222	8					9	my %attributes;
223							my $key;
224	0					0	my $value;
225
226							#remove the tagname and closing carat.
227	8	100				33	return undef unless $atag =~ / /;
228	3					8	$atag = substr($atag, index($atag, ' ')+1);
229
230							# while (mytrim($atag) ne "") {
231							# $atag = mytrim($atag);
232							# $key = mytrim(substr($atag, 0, index($atag, '=')));
233							# $atag = mytrim(substr($atag, index($atag, '=')+1));
234							# my $quote = substr($atag,0,1);
235							# $atag = substr($atag,1);
236							# $value = substr($atag, 0, index($atag, $quote));
237							# $atag = substr($atag, index($atag, $quote) + 1 );
238							#
239							# $attributes{$key} = $value;
240							# }
241	3					18	my @attribs = split(/\s+/, $atag);
242	3					6	foreach my $attrib (@attribs) {
243	3					15	my ($key, $value) = $attrib =~ /(.+)="(.*)"/;
244	3	50				9	$key = $attrib unless $key;
245	3		50			18	$attributes{lc($key)} = $value \|\| 1;
246							# die "$attrib: $key = $value";
247							}
248							#return the hash.
249	3					500	return %attributes;
250							}
251
252
253							sub mytrim {
254	2			2	1	1455	my $var = shift;
255	2					10	$var =~ s/^\s+\|\s+$//g;
256	2					7	return $var;
257							}
258
259							return 1;
260
261
262							=head1 NAME
263
264							Bricklayer::Templater::Parser - A generic parsing module.
265
266							=head1 SYNOPSIS
267
268							use Bricklayer::Templater::Parser;
269
270							my $template_text;
271							my $start_tag_prefix;
272							my $end_tag_prefix;
273
274							my @tokens = Template::Parser::parse_text($template_text,$start_tag_prefix,$end_tag_prefix);
275
276							=head1 REQUIRES
277
278							Perl 5.8
279							Exporter
280
281							=head1 DESCRIPTION
282
283							The parser package has a simple and single job to do, and that is to take a block
284							of text and turn it into an ordered array of tokens containing either a text block
285							or a single tag or tag block. Each token will contain both the block and a flag
286							noting what kind of token it is (text, tag_block or tag). The package then returns
287							the array.
288
289
290							=head1 METHODS
291
292							=head2 parse_text()
293
294							The parse_text() function takes the template text, a start tag prefix, and an end
295							tag prefix, parses the appropriate tags and returns a token tree in the form of an
296							array of hashes, each hash with a variable number of elements, depending on its type.
297
298							For Example:
299
300							%text_token = (
301							type => "text",
302							block => $block,
303							);
304
305							%block_tag_token = (
306							type => "block_tag",
307							tagname => $tagname,
308							block => $block,
309							attributes => \%attributes,
310							);
311
312							%tag_token = (
313							type => "tag",
314							tagname => $tagname,
315							attributes => \%attributes,
316							);
317
318							The attributes value is a reference to an attributes hash in the form of:
319
320							%attributes = (
321							"attribute_name" => "attribute_value",
322							);
323
324							Further Notes: The token tree returned by parse_text is not recursive, thus the
325							tags inside a tag_block will not be processed, and you will need to call the
326							parse_text() function again when dealing with those tags.
327
328							To build a complete tree, call the parse_text() function iteratively like so:
329
330							sub parse {
331							my $template_text = shift;
332							my $start_tag_prefix = shift;
333							my $end_tag_prefix = shift;
334							my @token_tree;
335
336							my @tokens = Parser::parse_text($template_text,$start_tag_prefix,$end_tag_prefix);
337
338							foreach my $token (@tokens) {
339							if ($token->{type} eq 'block_tag') {
340							my @sub_tokens;
341							@sub_tokens = parse($token->{block},$start_tag_prefix,$end_tag_prefix);
342							$token->{block} = \@sub_tokens;
343							}
344							push @token_tree, $token;
345							}
346							return @token_tree;
347							}
348
349							=head2 parse_attributes
350
351							parses the tag attributes
352
353							=head2 mytrim
354
355							trims whitespace for us
356
357							=head1 AUTHOR
358
359							(c) 2004 Jason Wall, , www.walljm.com
360							(c) 2004 Jeremy Wall, , jeremy.marzhillstudios.com
361
362							=cut