File Coverage

blib/lib/Bricklayer/Templater/Parser.pm
Criterion Covered Total %
statement 57 83 68.6
branch 15 28 53.5
condition 4 8 50.0
subroutine 5 5 100.0
pod 3 3 100.0
total 84 127 66.1


line stmt bran cond sub pod time code
1             #-------------------------------------------------------------------------------
2             #
3             # File: parser.pm
4             # Version: 0.9.1
5             # Author: Jason Wall and Jeremy Wall
6             # Definition: The parser package has a simple and single job to do, and that
7             # is to take a block of text and turn it into an ordered array of
8             # tokens containing either a text block or a tag/tag block.
9             # Each token will contain both the block and a flag noting what
10             # kind of token it is (text or tag). The package then returns the
11             # array.
12             #
13             # Revised by Jeremy Wall to fix errors when parsing undefined Tag
14             # Start definitions.
15             #-------------------------------------------------------------------------------
16             package Bricklayer::Templater::Parser;
17              
18             =head1 NAME
19              
20             Bricklayer::Templater::Parser - The parsing engine for Bricklayer::Templater
21              
22             =head1 Methods
23              
24             =cut
25              
26 2     2   4250 use strict;
  2         5  
  2         69  
27 2     2   10 use Carp;
  2         2  
  2         3943  
28              
29             # Global Variables
30             my $DefaultTagID = 'BK';
31              
32             # Package Methods
33             sub parse_text {
34             # Function wide variables
35 7     7 1 2201 my $textblock = shift; #this variable contains the block of text to be parsed.
36 7 100       41 my $tagID = shift or croak('Must pass in tagid'); # this variable contains the ID of the tag it is optional
37 5 100       21 my $startbracket = shift or croak('Must pass in startbracket');
38 4 50       9 my $endbracket = shift or croak('Must pass in endbracket');
39 4         11 my $TagStartText = $startbracket.$tagID; # Start tag prefix (it gets passed in)
40 4         8 my $TagEndText = $startbracket.'/'.$tagID; # End tag prefix (it also gets passed in)
41             #carp("startbracket: $startbracket");
42             #carp("endbracket: $endbracket");
43             #carp("TagStartText: $TagStartText");
44             #carp("TagEndText: $TagEndText");
45 4         6 my @tokens = (); #this variable will contain the tokenized text.
46            
47             # loop through the string tokenizing as you go. Since you are actually removing
48             # the pieces of text as you go, when you are finished, the string should be empty.
49 4   66     25 while (defined $textblock and $textblock ne "") {
50             # it becomes necessary to define these here, as each iteration through
51             # the loop will redefine them.
52            
53 8         11 my %token;
54             my $tagname;
55 0         0 my $block;
56 0         0 my %attributes;
57              
58 0         0 my $endpos; # this variable is used variously throughout the process.
59              
60             # check to see if the first part of the string is not a tag, ie doesn't start with the Start Tag Prefix..
61 8 50       36 if (substr($textblock, 0, length($TagStartText)) ne $TagStartText) {
    50          
62             #carp("In a text block");
63             # find the begining of the tag.
64 0         0 $endpos = index($textblock, $TagStartText);
65             # populate the text block
66 0 0       0 if ($endpos == -1) {
67             #carp("No more text to process");
68             # if there are no more template tags, put the remaining text into the block
69 0         0 $block = substr($textblock, 0);
70             #remove the text block from $textblock.
71 0         0 $textblock = "";
72             } else {
73             # if there are tags remaining, put the text that is in front of tag into the block
74 0         0 $block = substr($textblock, 0, $endpos);
75             #remove the text block from $textblock.
76 0         0 $textblock = substr($textblock, $endpos);
77             }
78              
79             # create and populate the token
80 0         0 %token = (
81             type => "text",
82             tagname => "text_block",
83             block => $block,
84             tagid => $tagID
85             );
86              
87             #put the token at the bottom of the stack.
88 0         0 push @tokens, \%token;
89             }
90             # check to see if the first part of the string is a tag
91             elsif (substr($textblock, 0, length($TagStartText)) eq $TagStartText) {
92 8         10 my $tag;
93            
94             # get the tag from the string.
95 8         18 $tag = substr($textblock,0,index($textblock, $endbracket) +1);
96            
97             # This is where we determine if the tag is a block or a single tag.
98             # We do this by looking for the forward slash, if it doesn't find one
99             # then it means the tag is a block.
100 8 50       22 if (index($tag, "/$endbracket") == -1) {
101            
102             # Get the tag name.
103 8         17 $tagname = substr($tag, length($TagStartText), index($tag, $endbracket)-length($TagStartText));
104 8 100       17 if (index($tagname, ' ') != -1) {
105 3         7 $tagname = substr($tagname, 0, index($tagname, ' '));
106             }
107            
108             # Get the tag block.
109             # But first remove the opening tag from $textblock.
110 8         15 $textblock = substr($textblock, index($textblock, $endbracket)+1);
111            
112            
113             # before going on, you must check for nested tags identical to the current tag.
114 8         11 my $open_tag = $TagStartText.$tagname;
115 8         11 my $close_tag = $TagEndText.$tagname;
116 8         26 my $open_len = length($open_tag);
117 8         9 my $close_len = length($close_tag);
118 8         9 my $ignore = 0;
119 8         711 my $found_end = 0;
120             #is there an open tag in our text block still?
121 8         15 my $idx_open_tag = index($textblock,$open_tag);
122             #is there a close tag in our text block?
123 8         12 my $idx_close_tag = index($textblock,$close_tag);
124            
125 8         26 while (not $found_end) {
126            
127 8 50 33     24 if ($idx_open_tag == -1 or $idx_open_tag > $idx_close_tag) {
128             # there is not open tag or there is an open tag before the close tag
129             # This is a possible end tag, check to see if it is what
130             # we are looking for
131 8 50       13 if ($ignore > 0 ) {
132             # this is not the end tag you are looking for...
133 0         0 $ignore--;
134             # find the index of the next close tag after this one;
135 0         0 $idx_close_tag = index($textblock, $close_tag, $idx_close_tag+$close_len)
136            
137             } else {
138             # This is the end tag you are looking for.
139 8         19 $found_end = 1;
140             }
141             } else {# there is another open tag before our first close tag
142             # this is a nested open tag, it needs to be checked
143             # to see if it is self closing.
144 0         0 my $nested_tag =
145             substr($textblock,$idx_open_tag,
146             index($textblock, $endbracket, $idx_open_tag)-$idx_open_tag+1);
147 0 0       0 if (index($nested_tag, "/$endbracket") == -1) {
148             # this is a nested block tag
149 0         0 $ignore++;
150             }
151             # find the next open tag.
152 0         0 $idx_open_tag = index($textblock, $open_tag, $idx_open_tag+$open_len)
153             }
154             }
155             # grab our block from the text block start to the closing tags index
156 8         13 $block = substr($textblock, 0, $idx_close_tag);
157              
158             # and remove the block of text from $textblock.
159 8         11 $textblock = substr($textblock, $idx_close_tag);
160             # remove the closing tag from $textblock.
161 8         15 $textblock = substr($textblock, index($textblock, $endbracket)+1);
162            
163             # get the attributes.
164 8         14 $tag = substr($tag,length($TagStartText));
165 8         14 $tag = substr($tag, 0, index($tag, $endbracket));
166              
167 8         17 %attributes = parse_attributes($tag);
168            
169             # populate the token
170 8         45 %token = (
171             type => "container",
172             tagname => $tagname,
173             block => $block,
174             attributes => \%attributes,
175             tagid => $tagID
176             );
177            
178            
179             } else {
180             # this parses a single tag out.
181              
182             # Get the tag name.
183 0         0 $tagname = substr($tag, length($TagStartText), index($tag, $endbracket)-(length($TagStartText)+1));
184 0 0       0 if (index($tagname, ' ') != -1) {
185 0         0 $tagname = substr($tagname, 0, index($tagname, ' '));
186             }
187            
188             # remove the tag from $textblock.
189 0         0 $textblock = substr($textblock, index($textblock, $endbracket)+1);
190            
191             # get the attributes.
192 0         0 $tag = substr($tag,length($TagStartText));
193 0         0 $tag = substr($tag, 0, index($tag, $endbracket));
194 0         0 %attributes = parse_attributes($tag);
195            
196             # populate the token
197 0         0 %token = (
198             type => "single",
199             tagname => $tagname,
200             block => undef,
201             attributes => \%attributes,
202             tagid => $tagID
203             );
204             }
205            
206             # put the token on the bottom of the stack.
207 8         43 push @tokens, \%token;
208             }
209             }
210            
211            
212 4         24 return @tokens;
213             }
214              
215              
216             # this function knows how to parse the attributes out of a textblock
217             # and returns them in a hash.
218             sub parse_attributes {
219            
220             #variable declarations
221 8     8 1 12 my $atag = shift;
222 8         9 my %attributes;
223             my $key;
224 0         0 my $value;
225              
226             #remove the tagname and closing carat.
227 8 100       33 return undef unless $atag =~ / /;
228 3         8 $atag = substr($atag, index($atag, ' ')+1);
229              
230             # while (mytrim($atag) ne "") {
231             # $atag = mytrim($atag);
232             # $key = mytrim(substr($atag, 0, index($atag, '=')));
233             # $atag = mytrim(substr($atag, index($atag, '=')+1));
234             # my $quote = substr($atag,0,1);
235             # $atag = substr($atag,1);
236             # $value = substr($atag, 0, index($atag, $quote));
237             # $atag = substr($atag, index($atag, $quote) + 1 );
238             #
239             # $attributes{$key} = $value;
240             # }
241 3         18 my @attribs = split(/\s+/, $atag);
242 3         6 foreach my $attrib (@attribs) {
243 3         15 my ($key, $value) = $attrib =~ /(.+)="(.*)"/;
244 3 50       9 $key = $attrib unless $key;
245 3   50     18 $attributes{lc($key)} = $value || 1;
246             # die "$attrib: $key = $value";
247             }
248             #return the hash.
249 3         500 return %attributes;
250             }
251              
252              
253             sub mytrim {
254 2     2 1 1455 my $var = shift;
255 2         10 $var =~ s/^\s+|\s+$//g;
256 2         7 return $var;
257             }
258              
259             return 1;
260              
261              
262             =head1 NAME
263              
264             Bricklayer::Templater::Parser - A generic parsing module.
265              
266             =head1 SYNOPSIS
267              
268             use Bricklayer::Templater::Parser;
269              
270             my $template_text;
271             my $start_tag_prefix;
272             my $end_tag_prefix;
273              
274             my @tokens = Template::Parser::parse_text($template_text,$start_tag_prefix,$end_tag_prefix);
275              
276             =head1 REQUIRES
277              
278             Perl 5.8
279             Exporter
280              
281             =head1 DESCRIPTION
282              
283             The parser package has a simple and single job to do, and that is to take a block
284             of text and turn it into an ordered array of tokens containing either a text block
285             or a single tag or tag block. Each token will contain both the block and a flag
286             noting what kind of token it is (text, tag_block or tag). The package then returns
287             the array.
288              
289              
290             =head1 METHODS
291              
292             =head2 parse_text()
293              
294             The parse_text() function takes the template text, a start tag prefix, and an end
295             tag prefix, parses the appropriate tags and returns a token tree in the form of an
296             array of hashes, each hash with a variable number of elements, depending on its type.
297              
298             For Example:
299            
300             %text_token = (
301             type => "text",
302             block => $block,
303             );
304            
305             %block_tag_token = (
306             type => "block_tag",
307             tagname => $tagname,
308             block => $block,
309             attributes => \%attributes,
310             );
311            
312             %tag_token = (
313             type => "tag",
314             tagname => $tagname,
315             attributes => \%attributes,
316             );
317              
318             The attributes value is a reference to an attributes hash in the form of:
319              
320             %attributes = (
321             "attribute_name" => "attribute_value",
322             );
323            
324             Further Notes: The token tree returned by parse_text is not recursive, thus the
325             tags inside a tag_block will not be processed, and you will need to call the
326             parse_text() function again when dealing with those tags.
327            
328             To build a complete tree, call the parse_text() function iteratively like so:
329            
330             sub parse {
331             my $template_text = shift;
332             my $start_tag_prefix = shift;
333             my $end_tag_prefix = shift;
334             my @token_tree;
335            
336             my @tokens = Parser::parse_text($template_text,$start_tag_prefix,$end_tag_prefix);
337            
338             foreach my $token (@tokens) {
339             if ($token->{type} eq 'block_tag') {
340             my @sub_tokens;
341             @sub_tokens = parse($token->{block},$start_tag_prefix,$end_tag_prefix);
342             $token->{block} = \@sub_tokens;
343             }
344             push @token_tree, $token;
345             }
346             return @token_tree;
347             }
348              
349             =head2 parse_attributes
350              
351             parses the tag attributes
352              
353             =head2 mytrim
354              
355             trims whitespace for us
356              
357             =head1 AUTHOR
358              
359             (c) 2004 Jason Wall, , www.walljm.com
360             (c) 2004 Jeremy Wall, , jeremy.marzhillstudios.com
361              
362             =cut