File Coverage

blib/lib/Text/Highlight.pm
Criterion Covered Total %
statement 154 305 50.4
branch 61 198 30.8
condition 23 73 31.5
subroutine 12 16 75.0
pod 0 5 0.0
total 250 597 41.8


line stmt bran cond sub pod time code
1             package Text::Highlight;
2              
3 2     2   37873 use strict;
  2         7  
  2         103  
4 2     2   13 use Carp qw/cluck croak/;
  2         5  
  2         269  
5              
6             #accessable and editable if someone really wants them
7 2         19544 use vars qw($VERSION $VB_FORMAT $VB_WRAPPER $VB_ESCAPE
8             $TGML_FORMAT $TGML_WRAPPER $TGML_ESCAPE $RAW_COLORS
9             $DEF_FORMAT $DEF_ESCAPE $DEF_WRAPPER $DEF_COLORS
10 2     2   13 $ANSI_FORMAT $ANSI_WRAPPER $ANSI_COLORS);
  2         8  
11             $VERSION = 0.04;
12              
13             #some wrapper settings for typical message boards (ie, the ones I frequent :)
14             #Anyone with an idea for IPB or phpBB settings, let me know. Last time I checked IPB,
15             # the only way to set mono-spaced font is to use [code] tags, which destroy any markup within.
16             #A PHP port is planned once the issues with this get ironed out.
17             $VB_FORMAT = '[color=%s]%s[/color]';
18             $VB_WRAPPER = '[code]%s[/code]';
19             # [ -> [
20             $VB_ESCAPE = sub { $_[0] =~ s/\[/[/g; $_[0] };
21              
22             $TGML_FORMAT = '[color %s]%s[/color]';
23             $TGML_WRAPPER = "[code]\n%s\n[/code]";
24             # [ -> [[]
25             $TGML_ESCAPE = sub { $_[0] =~ s/\[/[[]/g; $_[0] };
26              
27             $RAW_COLORS = { comment => '#006600',
28             string => '#808080',
29             number => '#FF0000',
30             key1 => '#0000FF',
31             key2 => '#FF0000',
32             key3 => '#FF8000',
33             key4 => '#00B0B0',
34             key5 => '#FF00FF',
35             key6 => '#D0D000',
36             key7 => '#D0D000',
37             key8 => '#D0D000',
38             };
39              
40             #default values in new()
41             $DEF_FORMAT = '%s';
42             $DEF_ESCAPE = \&_simple_html_escape;
43             $DEF_WRAPPER = '
%s
';
44             $DEF_COLORS = { comment => 'comment',
45             string => 'string',
46             number => 'number',
47             key1 => 'key1',
48             key2 => 'key2',
49             key3 => 'key3',
50             key4 => 'key4',
51             key5 => 'key5',
52             key6 => 'key6',
53             key7 => 'key7',
54             key8 => 'key8',
55             };
56              
57             #set limit maximum of keyword groups (must change default colors hash, too)
58             #not a package var, must be changed here (better know what you're doing)
59             my $KEYMAX = 8;
60              
61             sub new
62             {
63 1     1 0 14 my $class = shift;
64            
65 1         3 my $self = {};
66            
67             #set defaults (as copies of $DEF_*)
68 1         5 $self->{_output} = '';
69 1         4 $self->{_format} = $DEF_FORMAT;
70 1         3 $self->{_escape} = $DEF_ESCAPE;
71 1         4 $self->{_wrapper} = $DEF_WRAPPER;
72 1         7 %{$self->{_colors}} = %$DEF_COLORS;
  1         14  
73 1         4 $self->{_grammars} = {};
74            
75 1         3 bless $self, $class;
76            
77             #set any parameters passed to new
78 1         5 $self->configure(@_);
79            
80 1         4 return $self;
81             }
82              
83             sub configure
84             {
85 1     1 0 3 my $self = shift;
86            
87             #my extensive parameter checking :(
88 1 50       9 my %param = @_ if(@_ % 2 == 0);
89            
90 1 50       8 return unless %param;
91            
92             #do we want vBulletin-friendly output?
93 1 50 33     18 if(exists $param{vb} && $param{vb})
94             {
95             #set generalized defaults for posting in a forum
96 0         0 $self->{_format} = $VB_FORMAT;
97 0         0 $self->{_wrapper} = $VB_WRAPPER;
98 0         0 %{$self->{_colors}} = %$RAW_COLORS;
  0         0  
99 0         0 $self->{_escape} = $VB_ESCAPE;
100             }
101              
102             #do we want Tek-Tips-friendly output?
103 1 50 33     6 if(exists $param{tgml} && $param{tgml})
104             {
105             #set generalized defaults for posting in a forum
106 0         0 $self->{_format} = $TGML_FORMAT;
107 0         0 $self->{_wrapper} = $TGML_WRAPPER;
108 0         0 %{$self->{_colors}} = %$RAW_COLORS;
  0         0  
109 0         0 $self->{_escape} = $TGML_ESCAPE;
110             }
111            
112             #do we want ANSI-terminal-friendly output?
113 1 50 33     5 if(exists $param{ansi} && $param{ansi})
114             {
115             #dumped in an eval block to only require the module for those who use it
116 0         0 eval q[
117             use Term::ANSIColor;
118             $ANSI_FORMAT = '%s%s'.color('reset');
119             $ANSI_WRAPPER = '%s';
120             $ANSI_COLORS = { comment => color('bold green'),
121             string => color('bold yellow'),
122             number => color('bold red'),
123             key1 => color('bold cyan'),
124             key2 => color('bold red'),
125             key3 => color('bold magenta'),
126             key4 => color('bold blue'),
127             key5 => color('bold blue'),
128             key6 => color('bold blue'),
129             key7 => color('bold blue'),
130             key8 => color('bold blue'),
131             };
132             ];
133 0 0       0 if($@)
134             {
135 0         0 cluck $@;
136             }
137             else
138             {
139             #set ANSI color escape sequences
140 0         0 $self->{_format} = $ANSI_FORMAT;
141 0         0 $self->{_wrapper} = $ANSI_WRAPPER;
142 0         0 %{$self->{_colors}} = %$ANSI_COLORS;
  0         0  
143            
144             #set the escape to undef, assuming it's not already set
145 0 0       0 $param{escape} = undef unless(exists $param{escape});
146             }
147             }
148            
149             #if array ref, set to all readable files in list, else just the one passed
150 1 50       5 if(exists $param{wordfile})
151             {
152 0 0       0 if(ref $param{wordfile} eq 'ARRAY')
153             {
154 0         0 my $tmpref = [];
155 0         0 for(@{$param{wordfile}})
  0         0  
156             {
157 0 0       0 -r && push @$tmpref, $_;
158             }
159 0 0       0 $self->{_wordfile} = $tmpref if(@$tmpref > 0);
160             } else {
161 0 0       0 -r $param{wordfile} && push @{$self->{_wordfile}}, $param{wordfile};
  0         0  
162             }
163             }
164            
165             #should have two "%s" strings in it, for type and code
166 1 50       6 if(exists $param{format})
167             {
168 0 0       0 if($param{format} =~ /(\%s.*){2}/)
169             {
170 0         0 $self->{_format} = $param{format};
171             } else {
172 0         0 cluck "Param format invalid: does not have two %s strings.\n";
173             }
174             }
175            
176             #need one %s for the code
177 1 50       5 if(exists $param{wrapper})
178             {
179             #undef -> no wrapper
180 1 50       9 unless(defined($param{wrapper}))
    50          
181             {
182 0         0 $self->{_wrapper} = '%s';
183             }
184            
185             #if not undef, needs to have a %s for the code
186             elsif($param{wrapper} =~ /\%s/)
187             {
188 1         8 $self->{_wrapper} = $param{wrapper};
189             }
190            
191             else {
192 0         0 cluck "Param wrapper invalid: does not have %s string.\n";
193             }
194             }
195            
196             #sub is the same prototype as CGI.pm's escapeHTML()
197             #and HTML::Entity's encode_entities()
198             #$escaped_string = escapeHTML("unescaped string");
199 1 50       5 if(exists $param{escape})
200             {
201             #undef -> no escaping, set dummy sub to return input
202 0 0       0 unless(defined($param{escape}))
    0          
    0          
203             {
204 0     0   0 $self->{_escape} = sub { return $_[0] };
  0         0  
205             }
206            
207             #if not undef, check for code ref
208             elsif(ref $param{escape} eq 'CODE')
209             {
210 0         0 $self->{_escape} = $param{escape};
211             }
212            
213             #and last, check for 'default' string
214             elsif($param{escape} =~ /^default$/i)
215             {
216 0         0 $self->{_escape} = $DEF_ESCAPE;
217             }
218            
219             else {
220 0         0 cluck "Param escape invalid: is not coderef, undef, or 'default' string.\n";
221             }
222             }
223            
224             #must pass a hashref
225 1 50       12 if(exists $param{colors})
226             {
227 0 0       0 if(ref $param{colors} eq 'HASH')
228             {
229             #loop over only predefined classes (defaults from new)
230 0         0 for(keys %{$self->{_colors}})
  0         0  
231             {
232 0 0       0 $self->{_colors}{$_} = $param{colors}{$_} if(exists $param{colors}{$_});
233             }
234             } else {
235 0         0 cluck "Param colors invalid: is not a hashref.\n";
236             }
237             }
238             }
239              
240             #get the stynax from a sub-module, and maybe the sub-module will even do the parsing
241             sub highlight
242             {
243 7     7 0 125 my $self = shift;
244             #call with a hash or not
245 7 100       41 my %args = @_ if(@_ % 2 == 0);
246 7         10 my($type,$code,$options);
247 7 50 33     31 if(exists $args{type} && exists $args{code})
248             {
249 0         0 $type = $args{type};
250 0         0 $code = $args{code};
251 0         0 $options = $args{options}; #optional
252             }
253             else
254             {
255 7         10 $type = shift;
256 7         9 $code = shift;
257 7         10 $options = shift; #optional
258             }
259            
260             #check null context
261 7 50       19 return undef unless defined wantarray;
262            
263             #this is not a class method, don't try it
264 7 50       19 return undef unless ref $self;
265            
266             #check if we've loaded this type custom from a file, as it overrides any default option
267 7 50       20 if(exists $self->{_grammars}{$type}) {
268 0         0 $self->{_active} = $self->{_grammars}{$type};
269 0         0 $self->_highlight($code);
270             } else {
271            
272             #this is where the module for this type should be
273             #since this is being require-d, should probably taint check $type a bit
274 7         41 my $grammar = __PACKAGE__ . "::$type";
275            
276             #try to include it
277 7 50       668 eval "require $grammar" or croak "Bad grammar: $@";
278            
279             #clear output
280 7         49 $self->{_output} = '';
281            
282             #check if the module has a highlight method, else just get the syntax from it and use the parser here
283 7 50 66     329 if($grammar->can('highlight') && $options ne 'simple')
    50          
284             {
285 0         0 $grammar->highlight($self, $code, $options);
286             }
287             elsif($grammar->can('syntax'))
288             {
289 7         31 $self->{_active} = $grammar->syntax;
290 7         2598 $self->_highlight($code);
291             }
292             else
293             {
294 0         0 croak "$grammar does not have a highlight or syntax method.";
295             }
296             }
297            
298             #wrap the code in whatever tags
299 7         40 $self->{_output} = sprintf($self->{_wrapper}, $self->{_output});
300            
301 7         22 return $self->output;
302             }
303              
304             #the one that does all the work
305             sub _highlight
306             {
307 7     7   16 my $self = shift;
308 7         15 my $code = shift;
309            
310             #make a hash to store the index of the next occurance of each comment/string/escape delimiter
311 7         13 my %delims;
312            
313 7         25 $delims{ $self->{_active}{escape} } = 1;
314             #check definedness and emptiness in case of ordering oddities in the grammar file
315 7   33     15 defined && ($_ ne '') && ($delims{$_} = 1) for(@{$self->{_active}{quot}});
  7   50     114  
316 7   33     14 defined && ($_ ne '') && ($delims{$_} = 1) for(@{$self->{_active}{lineComment}});
  7   50     60  
317            
318             #a valid open AND close tag is a must to consider a block comment
319 7         13 for(0,1)
320             {
321 14 50 66     102 if(defined $self->{_active}{blockCommentOn}[$_] and
      66        
      33        
322             $self->{_active}{blockCommentOn}[$_] ne '' and
323             defined $self->{_active}{blockCommentOff}[$_] and
324             $self->{_active}{blockCommentOff}[$_] ne '')
325             {
326 6         17 $delims{ $self->{_active}{blockCommentOn}[$_] } = 1;
327             }
328             }
329            
330             #index to the current string location in $code
331 7         12 my $cur = 0;
332            
333             #search for the first occurance of each delimiter
334 7         56 $delims{$_} = index($code, $_, $cur) for(keys %delims);
335            
336             #while some delimiters still remain
337 7   33     42 while(%delims and $cur != -1)
338             {
339             #find the next delimiter and recalculate any passed indexes
340 24         67 my $min = _find_next_delim(\%delims, \$code, $cur);
341            
342             #break out of the loop if it couldn't find a delim
343 24 100       53 last unless(defined($min));
344              
345             #colorize what was before the found comment/string
346 17         72 $self->_keyword(substr($code, $cur, $delims{$min}-$cur));
347            
348             #I realize this is pretty pointless, it's just that in older versions of this
349             #whose code is reused, there was no $min, just a $delim that was pulled from a regex
350             #mnemonically, $delim is the delimiter, and $min is the key to the minimum index
351             #spare the couple bytes for now so I don't have to say $delims{$delim}
352 17         99 my $delim = $min;
353            
354             #move the index of $min past the delimiter itself
355             #it makes for easier reading substr() and index() calls
356             #it gets reset to 0 after each call below, anyway,
357             #so it will get recalculated on the next iteration
358 17         83 $delims{$min} += length($min);
359            
360             #if an escape sequence
361 17 50       55 if($delim eq $self->{_active}{escape})
362             {
363             #pass thru uncolored (might define an 'escape' color sometime)
364             #most escape sequences tend to be in strings, anyway
365             #the original delimiter (escape character) and the one after it are passed
366 0         0 $self->_colorize(undef,$delim.substr($code, $delims{$min}, 1));
367            
368             #move the current index past the character following the escape
369 0         0 $cur = $delims{$min} + 1;
370            
371             #reset escape's next position
372 0         0 $delims{$min} = 0;
373            
374             #find me another delimiter!
375 0         0 next;
376             }
377            
378             #if a quote
379 17 100       22 if(grep { $delim eq $_ } @{$self->{_active}{quot}})
  33         171  
  17         49  
380             {
381             #since a string can contain escape sequences, this if {} block functions
382             #roughly the same as the outer while {} block, but with its own %delim (as %d)
383             #and $min (as $m) and $cur (as $idx)
384            
385             #init %d with whatever quote character got us in here (and may get us out)
386             #and the stored escape character for this language
387 7         64 my %d = ( $delim => 1, $self->{_active}{escape} => 1);
388            
389             #add newline as an escape unless this language support multiline quotes
390 7 100       29 $d{"\n"} = 1 unless($self->{_active}{continueQuote});
391            
392             #the search for the end of the string starts after the starting quote
393 7         11 my $idx = $delims{$min};
394            
395             #search for the first occurance of each delimiter
396 7         54 $d{$_} = index($code, $_, $idx) for(keys %d);
397            
398 7   33     44 while(%d and $idx != -1)
399             {
400             #find the next delimiter
401 8         23 my $m = _find_next_delim(\%d, \$code, $idx);
402            
403             #if it couldn't find any delimter or we found a newline, we couldn't
404             #close the string, so set a negative index and drop out of the loop
405 8 50 33     42 if(!defined($m) || $m eq "\n")
406             {
407 0         0 $idx = -1;
408 0         0 last;
409             }
410            
411             #set after the found delimiter
412 8         16 $d{$m} += length($m);
413            
414             #if esc, set the index past the escape sequence and reset esc's idx
415 8 100       24 if($m eq $self->{_active}{escape})
416             {
417 1         116 $idx = $d{$m} + 1;
418 1         3 $d{$m} = 0;
419             }
420            
421             #if a closing quote, set index to after it and drop from the loop
422 8 100       27 if($m eq $delim)
423             {
424 7         10 $idx = $d{$m};
425 7         13 last;
426             }
427             }
428            
429             #if a suitable closing delimiter was found
430 7 50       16 if($idx != -1)
431             {
432 7         304 $self->_colorize('string',$delim.substr($code, $delims{$min}, $idx-$delims{$min}));
433 7         14 $cur = $idx;
434             }
435             else #couldn't close the quote, just send it on
436             {
437 0         0 $self->_colorize(undef,$delim);
438 0         0 $cur = $delims{$min};
439             }
440 7         13 $delims{$min} = 0;
441 7         40 next;
442             }
443            
444             #check if it starts a line comment
445 10 100       13 if(grep { $delim eq $_ } @{$self->{_active}{lineComment}})
  6         18  
  10         31  
446             {
447             #comment to the next newline
448 4 50       16 if((my $end = index($code, "\n", $delims{$min})) != -1)
449             {
450             #check if we split a windows newline in the source, and move before it
451 4 50       89 $end-- if(substr($code, $end - 1, 1) eq "\r");
452            
453             #if the source is viewed, it'll look prettier if the closing comment tag
454             #is before the newline, so don't move the index past it
455 4         30 $self->_colorize('comment',$delim.substr($code, $delims{$min}, $end-$delims{$min}));
456 4         9 $cur = $end;
457             }
458             else #no newline found, so comment to string end
459             {
460 0         0 $self->_colorize('comment',$delim.substr($code, $delims{$min}));
461 0         0 $cur = -1;
462             }
463 4         10 $delims{$min} = 0;
464 4         24 next;
465             }
466            
467             #something to remember which block comment this is
468 6         8 my $t;
469             #check if it starts a block comment
470 6 50       11 if(grep { ($delim eq $self->{_active}{blockCommentOn}[$_]) && defined($t = $_) }
  6 50       45  
  6         17  
471             (0..$#{$self->{_active}{blockCommentOn}}))
472             {
473             #comment to the closing comment tag
474 6 50       28 if((my $end = index($code, $self->{_active}{blockCommentOff}[$t], $delims{$min})) != -1)
475             {
476             #set end after the closing tag
477 6         14 $end += length($self->{_active}{blockCommentOff}[$t]);
478 6         31 $self->_colorize('comment',$delim.substr($code, $delims{$min}, $end-$delims{$min}));
479 6         13 $cur = $end;
480             }
481             else #no closing tag found, so comment to string end
482             {
483 0         0 $self->_colorize('comment',$delim.substr($code, $delims{$min}));
484 0         0 $cur = -1;
485             }
486 6         7 $delims{$min} = 0;
487 6         34 next;
488             }
489             }
490            
491             #colorize last chunk after all comments and strings if there is one
492 7 50       35 $self->_keyword(substr($code, $cur)) if($cur != -1);
493            
494             # return $self->output;
495             }
496              
497             sub output
498             {
499 7     7 0 13 my $self = shift;
500            
501             #return a two-element list of the marked-up code and the code type's name,
502             #or just the marked-up code itself, depending on context
503             #return wantarray ? ($self->{_output}, $self->{_active}{name}) : $self->{_output};
504            
505             #the above was useful when code's extention was passed, but now since module names
506             #are passed, I assume those will be pretty descriptive, and this name method isn't needed.
507             #Likely it'll just cause problems with people unexpected using list context (like print)
508 7         64 return $self->{_output};
509             }
510              
511             sub _find_next_delim
512             {
513             #hash-ref, scalar-ref (could be a big scalar), scalar
514 32     32   49 my($delims, $code, $cur) = @_;
515 32         34 my $min;
516 32         82 for(keys %$delims)
517             {
518             #find a new index for those not after the current "start" position
519 83 100       206 $delims->{$_} = index($$code, $_, $cur) if($delims->{$_} < $cur);
520            
521             #doesn't exist in the remaining code, don't touch it again
522 83 100       161 if($delims->{$_} == -1)
523             {
524 39         58 delete $delims->{$_};
525 39         80 next;
526             }
527            
528             #if min is not defined or min is less than new delim, set to new
529 44 100 100     229 $min = $_ if(!defined($min) or $delims->{$_} < $delims->{$min});
530             }
531 32         91 return $min;
532             }
533              
534             sub _simple_html_escape
535             {
536 231     231   273 my $code = shift;
537            
538             #escape the only three characters that "really" matter for displaying html
539 231         293 $code =~ s/&/&/g;
540 231         285 $code =~ s/
541 231         468 $code =~ s/>/>/g;
542              
543 231         487 return $code;
544             }
545              
546             sub _colorize
547             {
548 231     231   430 my ($self, $type, $what) = @_;
549            
550             #do any escaping of characters before appending to output
551 231         503 $what = &{$self->{_escape}}($what);
  231         550  
552            
553             #check if type is defined. Append type's class, else just the bare text
554 231 100       1624 $self->{_output} .= defined($type) ? sprintf($self->{_format}, $self->{_colors}{$type}, $what) : $what;
555             }
556              
557             sub _keyword
558             {
559 24     24   69 my ($self, $code) = @_;
560              
561             #escape all the delimiters that need to be and dump in char class
562 24         58 my $d = quotemeta $self->{_active}{delimiters};
563            
564             #save the pattern so it doesn't compile each time (whitespace is considered a delim, too)
565 24         723 my $re = qr/\G(.*?)([$d\s]+)/s;
566            
567             #could help, in theory, but it doesn't seem to help at all when doing
568             #repeated m//g global searches with position anchors defeats the point of study()
569             #study($code);
570            
571 24   66     327 while($code =~ /$re/gc || #search for a delimiter (don't reset pos on fail)
572             $code =~ /\G(.+)/sg) #grab what's left in the string if there's no delim
573             {
574             #before the delimiter
575 107         207 my $chunk = $1;
576            
577             #the delimiter(s), or empty if no more delims
578 107 50       238 my $delim = defined($2) ? $2 : undef;
579            
580             #remember if we actually did anything
581 107         170 my $done = 0;
582            
583             #find which key group, if any, this chunk falls under
584             #start at 1 and work up
585 107         109 my $key = 1;
586            
587             #check if this key group exists for this language
588 107         369 while(exists $self->{_active}{"key$key"})
589             {
590 219 100       520 my $check = ($self->{_active}{case}) ? $chunk : lc($chunk);
591            
592             #check if this chunk exists for this keygroup
593 219 100       803 if(exists $self->{_active}{"key$key"}{$check})
594             {
595             #colorize it as this group, set done/found and exit loop
596 50         144 $self->_colorize("key$key",$chunk);
597 50         81 $done = 1;
598 50         67 last;
599             }
600            
601             #nope, not this key group, maybe next
602 169         423 $key++;
603             }
604            
605             #I had a much better "number" regex, but it was probably perl-specific and this should do
606 107 100       425 if($chunk =~ /^\d*\.?\d+$/)
607             {
608 1         4 $self->_colorize('number',$chunk);
609 1         2 $done = 1;
610             }
611            
612             #if the chunk didn't match a pattern above, it's nothing and gets no color but default
613 107 100       253 $self->_colorize(undef,$chunk) unless($done);
614            
615             #dump the delimiter to output, too, without color
616 107 50       302 $self->_colorize(undef,$delim) if(defined($delim));
617             }
618             }
619              
620             #load syntax from a separate grammar file
621             sub get_syntax
622             {
623 0     0 0   my $self = shift;
624 0 0         my %args = @_ if(@_ % 2 == 0);
625 0           my($type,$grammar,$format,$force);
626 0 0 0       if(exists $args{type} && exists $args{grammar})
627             {
628 0           $type = $args{type};
629 0           $grammar = $args{grammar};
630 0           $format = $args{format};
631 0           $force = $args{force};
632             }
633             else
634             {
635 0           $type = shift;
636 0           $grammar = shift;
637 0           $format = shift;
638 0           $force = shift;
639             }
640            
641 0 0         unless($type) {
642 0           cluck "You must specify a type.\n";
643 0           return undef;
644             }
645            
646             #check if syntax for this type is already loaded and reload isn't forced
647 0 0 0       return $self->{_grammars}{$type} if(!$force && exists $self->{_grammars}{$type});
648            
649 0 0         unless($grammar) {
650 0           cluck "No grammar for '$type' found.\n";
651 0           return undef;
652             }
653            
654             #check if a hashref was passed in instead of a filename
655 0 0         if(ref $grammar eq 'HASH') {
656 0           $self->{_grammars}{$type} = $grammar;
657 0           return $grammar;
658             }
659            
660             #holds the grammar structure
661             #initialize and set common defaults in case of incomplete grammar
662 0           my %syntax = (
663             name => 'Unknown-type',
664             escape => '\\',
665             case => 1,
666             continueQuote => 0,
667             blockCommentOn => [],
668             lineComment => [],
669             quot => [],
670             );
671             #attempt to open grammar file
672 0 0         open FH, $grammar or croak "Cannot open '$grammar' to find syntax for '$type': $!";
673            
674 0 0         if($format eq 'editplus') {
    0          
675 0           _get_syntax_editplus(\%syntax, \*FH);
676             }
677             elsif($format eq 'ultraedit') {
678 0           _get_syntax_ultraedit(\%syntax, \*FH);
679             }
680             #else return a non-function yet parsable %syntax, might be desired?
681            
682 0           close FH;
683 0           $self->{_grammars}{$type} = \%syntax;
684            
685             #dump the syntax table to stderr (less screen space than Data::Dumper)
686             #print STDERR "$_ : ".((ref $syntax{$_} eq 'HASH') ? join(' | ', keys %{$syntax{$_}}) : (ref $syntax{$_} eq 'ARRAY') ? join(' | ', @{$syntax{$_}}) : $syntax{$_})."\n" for(keys %syntax);
687              
688 0           return $self->{_grammars}{$type};
689             }
690              
691             sub _get_syntax_editplus
692             {
693 0     0     my $syntax = shift;
694 0           my $FH = shift;
695            
696             #make sure we break on newlines
697 0           local $/ = "\n";
698              
699 0           my $key = 1;
700              
701 0           while(<$FH>)
702             {
703             #comment and blank lines ignored
704 0 0 0       next if(/^;/ || !/./);
705              
706             #search for each type
707 0 0         $syntax->{name} = $1 if(/^\#TITLE=(.+?)$/i);
708 0 0         $syntax->{delimiters} = $1 if(/^\#DELIMITER=(.+?)$/i);
709 0 0         $syntax->{escape} = $1 if(/^\#ESCAPE=(.+?)$/i);
710 0 0         $syntax->{case} = 0 if(/^\#CASE=n$/i);
711 0 0         $syntax->{case} = 1 if(/^\#CASE=y$/i);
712 0 0         $syntax->{continueQuote} = 0 if(/^\#CONTINUE_QUOTE=n$/i);
713 0 0         $syntax->{continueQuote} = 1 if(/^\#CONTINUE_QUOTE=y$/i);
714              
715 0 0         $syntax->{blockCommentOn}[0] = $1 if(/^\#COMMENTON=(.+?)$/i);
716 0 0         $syntax->{blockCommentOff}[0] = $1 if(/^\#COMMENTOFF=(.+?)$/i);
717 0 0         $syntax->{blockCommentOn}[1] = $1 if(/^\#COMMENTON2=(.+?)$/i);
718 0 0         $syntax->{blockCommentOff}[1] = $1 if(/^\#COMMENTOFF2=(.+?)$/i);
719              
720 0 0         push @{$syntax->{lineComment}}, $1 if(/^\#LINECOMMENT\d?=(.+?)$/i);
  0            
721 0 0         push @{$syntax->{quot}}, $1 if(/^\#QUOTATION\d?=(.+?)$/i);
  0            
722              
723 0 0 0       if(/^\#KEYWORD/ && $key <= $KEYMAX)
724             {
725 0   0       while(defined($_ = <$FH>) && !/^\#/)
726             {
727             #comment and blank lines ignored
728 0 0 0       next if(/^;/ || !/./);
729 0           chomp;
730              
731             #the escape character is ^ and possible escape sequences are ^^ ^; ^#
732 0           s/\^([;^#])/$1/g;
733              
734             #save the literal if case sensitive, else lc it as key
735 0 0         if($syntax->{case}){
736 0           $syntax->{"key$key"}{$_} = $_;
737             } else {
738 0           $syntax->{"key$key"}{lc($_)} = $_;
739             }
740             }
741 0           $key++; #for next potential key group
742 0 0         redo unless(eof); #back to the top of the while without hitting again, assuming not EOF
743             }
744             }
745             }
746              
747             sub _get_syntax_ultraedit
748             {
749 0     0     my $syntax = shift;
750 0           my $FH = shift;
751            
752             #make sure we break on newlines
753 0           local $/ = "\n";
754            
755 0           while(<$FH>)
756             {
757 0 0         $syntax->{name} = $1 if(/^\/L\d+"(.+?)"/i);
758 0 0         $syntax->{escape} = $1 if(/Escape Char = (\S+)/);
759 0 0         $syntax->{case} = 0 if(/Nocase/);
760 0 0         push @{$syntax->{quot}}, split //, $1 if(/String Chars = (\S{1,2})/);
  0            
761            
762 0 0         $syntax->{blockCommentOn}[0] = $1 if(/Block Comment On = (\S{1,5})/);
763 0 0         $syntax->{blockCommentOff}[0] = $1 if(/Block Comment Off = (\S{1,5})/);
764 0 0         $syntax->{blockCommentOn}[1] = $1 if(/Block Comment On Alt = (\S{1,5})/);
765 0 0         $syntax->{blockCommentOff}[1] = $1 if(/Block Comment Off Alt = (\S{1,5})/);
766            
767 0 0         push @{$syntax->{lineComment}}, $1 if(/Line Comment (?:Alt )?= (\S{1,5})/);
  0            
768 0 0         $syntax->{delimiters} = $1 if(/^\/Delimiters = (.+)$/i);
769            
770 0           my($key) = /^\/C(\d+)(?:".+")?$/;
771 0 0 0       if($key && $key <= $KEYMAX)
772             {
773             #any non-escape line
774 0   0       while(defined($_ = <$FH>) && !/^\/(?!\/)/)
775             {
776 0           chomp;
777              
778             #escape is a line that starts with //, allows the line to contain / in keywords
779 0           s/^\/\///;
780              
781             #keywords are whitespace delimited, and ignore the empty strings with truth test
782 0           for(grep $_, split /\s+/)
783             {
784             #save the literal if case sensitive, else lc it as key
785 0 0         if($syntax->{case}){
786 0           $syntax->{"key$key"}{$_} = $_;
787             } else {
788 0           $syntax->{"key$key"}{lc($_)} = $_;
789             }
790             }
791             }
792 0 0         redo unless(eof); #back to the top of the while without hitting again, assuming not EOF
793             }
794             }
795            
796             # UE has both quotes enabled by default, so if none were defined, use them
797 0 0         @{$syntax->{quot}} or push @{$syntax->{quot}}, qw/' "/;
  0            
  0            
798             }
799              
800             1;
801              
802             __END__