File Coverage

blib/lib/CSS/SAC.pm
Criterion Covered Total %
statement 263 464 56.6
branch 132 276 47.8
condition 27 64 42.1
subroutine 27 33 81.8
pod 18 19 94.7
total 467 856 54.5


line stmt bran cond sub pod time code
1            
2             ###
3             # CSS::SAC - a SAC implementation for Perl
4             # Robin Berjon
5             # 17/08/2001 - bugfixes...
6             # 23/04/2001 - more enhancements
7             # 19/03/2001 - second version, various suggestions and enhancements
8             # 24/02/2001 - prototype mark I of the new model
9             ###
10            
11             package CSS::SAC;
12 2     2   2098 use strict;
  2         4  
  2         100  
13 2         306 use vars qw(
14             $VERSION
15             $RE_STRING
16             $RE_NAME
17             $RE_IDENT
18             $RE_RANGE
19             $RE_NUM
20             %DIM_MAP
21             %FUNC_MAP
22 2     2   10 );
  2         4  
23             $VERSION = '0.08';
24            
25 2     2   1544 use CSS::SAC::ConditionFactory qw();
  2         6  
  2         50  
26 2     2   1903 use CSS::SAC::SelectorFactory qw();
  2         9  
  2         59  
27 2     2   1405 use CSS::SAC::LexicalUnit qw(:constants);
  2         5  
  2         13  
28 2     2   112 use CSS::SAC::Selector::Sibling qw(:constants);
  2         6  
  2         19  
29 2     2   1385 use CSS::SAC::SelectorList qw();
  2         6  
  2         41  
30            
31 2     2   3000 use Text::Balanced qw();
  2         354815  
  2         120  
32            
33 2     2   18 use constant DEBUG => 0;
  2         3  
  2         941  
34            
35             #---------------------------------------------------------------------#
36             # build a few useful regexen and maps
37             #---------------------------------------------------------------------#
38            
39             # matches a quoted string
40             $RE_STRING = Text::Balanced::gen_delimited_pat(q{'"}); #"
41             $RE_STRING = qr/$RE_STRING/s;
42            
43             # matches a name token
44             $RE_NAME = qr/
45             (?:(?:\\(?:(?:[a-fA-F0-9]{1,6}[\t\x20])|[\x32-\xff]))|[a-zA-Z\x80-\xff0-9-])+
46             /xs;
47            
48             # matches a valid CSS ident (this may be wrong, needs testing)
49             $RE_IDENT = qr/
50             (?:(?:\\(?:(?:[a-fA-F0-9]{1,6}[\t\x20])|[ \x32-\xff]))|[a-zA-Z\x80-\xff])
51             (?:(?:\\(?:(?:[a-fA-F0-9]{1,6}[\t\x20])|[ \x32-\xff]))|[a-zA-Z\x80-\xff0-9_-])*
52             /xs;
53            
54             # matches a unicode range
55             $RE_RANGE = qr/(?:
56             (?:U\+)
57             (?:
58             (?:[0-9a-fA-F]{1,6}-[0-9a-fA-F]{1,6})
59             |
60             (?:\?{1,6})
61             |
62             (?:[0-9a-fA-F](?:
63             (?:\?{0,5}|[0-9a-fA-F])(?:
64             (?:\?{0,4}|[0-9a-fA-F])(?:
65             (?:\?{0,3}|[0-9a-fA-F])(?:
66             (?:\?{0,2}|[0-9a-fA-F])(?:
67             (?:\?{0,1}|[0-9a-fA-F])))))))
68             )
69             )
70             /xs;
71            
72            
73             # matches a number
74             $RE_NUM = qr/(?:(?:[0-9]*\.[0-9]+)|(?:[0-9]+))/;
75            
76            
77             # maps a length or assoc value to it's constant
78             %DIM_MAP = (
79             em => EM,
80             ex => EX,
81             px => PIXEL,
82             cm => CENTIMETER,
83             mm => MILLIMETER,
84             in => INCH,
85             pt => POINT,
86             pc => PICA,
87             deg => DEGREE,
88             rad => RADIAN,
89             grad => GRADIAN,
90             ms => MILLISECOND,
91             s => SECOND,
92             hz => HERTZ,
93             khz => KILOHERTZ,
94             '%' => PERCENTAGE,
95             );
96            
97             # maps a length or assoc value to it's constant
98             %FUNC_MAP = (
99             attr => ATTR,
100             counter => COUNTER_FUNCTION,
101             counters => COUNTERS_FUNCTION,
102             rect => RECT_FUNCTION,
103             url => URI,
104             rgb => RGBCOLOR,
105             );
106            
107             #---------------------------------------------------------------------#
108            
109            
110             #---------------------------------------------------------------------#
111             # build the fields for an array based object
112             #---------------------------------------------------------------------#
113 2         23 use Class::ArrayObjects define => {
114             fields => [qw(
115             _cf_
116             _sf_
117             _dh_
118             _eh_
119             _dh_can_
120             _allow_charset_
121             _ns_map_
122             _tmp_media_
123             )],
124 2     2   16 };
  2         4  
125             #---------------------------------------------------------------------#
126            
127            
128            
129            
130             ### Constructor #######################################################
131             # #
132             # #
133            
134            
135             #---------------------------------------------------------------------#
136             # CSS::SAC->new(\%options)
137             # creates a new sac parser
138             #---------------------------------------------------------------------#
139             sub new {
140 2     2 1 88 my $class = shift;
141 2   100     18 my $options = shift || {};
142            
143             # set our options
144 2         8 my $self = bless [], $class;
145 2   33     35 $self->[_cf_] = $options->{ConditionFactory} || CSS::SAC::ConditionFactory->new;
146 2   33     32 $self->[_sf_] = $options->{SelectorFactory} || CSS::SAC::SelectorFactory->new;
147 2   33     23 $self->[_eh_] = $options->{ErrorHandler} || CSS::SAC::DefaultErrorHandler->new;
148 2         7 $self->[_dh_can_] = {};
149 2 100       18 $self->DocumentHandler($options->{DocumentHandler}) if $options->{DocumentHandler};
150            
151 2         9 return $self;
152             }
153             #---------------------------------------------------------------------#
154            
155            
156             # #
157             # #
158             ### Constructor #######################################################
159            
160            
161            
162             ### Accessors #########################################################
163             # #
164             # #
165            
166            
167             #---------------------------------------------------------------------#
168             # $sac->ParserVersion
169             # returns the supported CSS version
170             #---------------------------------------------------------------------#
171             sub ParserVersion {
172             # IMP
173             # this should perhaps return http://www.w3.org/TR/REC-CSS{1,2,3}
174             # as per http://www.w3.org/TR/SAC, but it's tricky for CSS3 which
175             # is modularized
176 1     1 0 17 return 'CSS3';
177             }
178             #---------------------------------------------------------------------#
179             *CSS::SAC::getParserVersion = \&ParserVersion;
180            
181            
182             #---------------------------------------------------------------------#
183             # my $cf = $sac->ConditionFactory
184             # $sac->ConditionFactory($cf)
185             # get/set the ConditionFactory that we use
186             #---------------------------------------------------------------------#
187             sub ConditionFactory {
188 1 50   1 1 17 (@_==2) ? $_[0]->[_cf_] = $_[1] :
189             $_[0]->[_cf_];
190             }
191             #---------------------------------------------------------------------#
192             *CSS::SAC::setConditionFactory = \&ConditionFactory;
193            
194            
195             #---------------------------------------------------------------------#
196             # my $sf = $sac->SelectorFactory
197             # $sac->SelectorFactory($sf)
198             # get/set the SelectorFactory that we use
199             #---------------------------------------------------------------------#
200             sub SelectorFactory {
201 1 50   1 1 16 (@_==2) ? $_[0]->[_sf_] = $_[1] :
202             $_[0]->[_sf_];
203             }
204             #---------------------------------------------------------------------#
205             *CSS::SAC::setSelectorFactory = \&SelectorFactory;
206            
207            
208             #---------------------------------------------------------------------#
209             # my $dh = $sac->DocumentHandler
210             # $sac->DocumentHandler($dh)
211             # get/set the DocumentHandler that we use
212             #---------------------------------------------------------------------#
213             sub DocumentHandler {
214 1     1 1 2 my $sac = shift;
215 1         1 my $dh = shift;
216            
217             # set the doc handler, and see what it can do
218 1 50       5 if ($dh) {
219 1         2 $sac->[_dh_] = $dh;
220            
221 1         8 my @dh_methods = qw(
222             comment
223             charset
224             end_document
225             end_font_face
226             end_media
227             end_page
228             end_selector
229             ignorable_at_rule
230             import_style
231             namespace_declaration
232             property
233             start_document
234             start_font_face
235             start_media
236             start_page
237             start_selector
238             );
239 1         3 for my $meth (@dh_methods) {
240 16         89 $sac->[_dh_can_]->{$meth} = $dh->can($meth);
241             }
242             }
243            
244 1         4 return $sac->[_dh_];
245             }
246             #---------------------------------------------------------------------#
247             *CSS::SAC::setDocumentHandler = \&DocumentHandler;
248            
249            
250             #---------------------------------------------------------------------#
251             # my $eh = $sac->ErrorHandler
252             # $sac->ErrorHandler($eh)
253             # get/set the ErrorHandler that we use
254             #---------------------------------------------------------------------#
255             sub ErrorHandler {
256 1 50   1 1 17 (@_==2) ? $_[0]->[_eh_] = $_[1] :
257             $_[0]->[_eh_];
258             }
259             #---------------------------------------------------------------------#
260             *CSS::SAC::setErrorHandler = \&ErrorHandler;
261            
262            
263             # #
264             # #
265             ### Accessors #########################################################
266            
267            
268            
269            
270             ### Parsing Methods ###################################################
271             # #
272             # #
273            
274            
275             #---------------------------------------------------------------------#
276             # $sac->parse(\%options)
277             # parses a style sheet
278             #---------------------------------------------------------------------#
279             sub parse {
280 1     1 1 40 my $sac = shift;
281 1         3 my $options = shift;
282            
283             # we always load the style sheet into memory because style sheets
284             # are usually small. If this is a problem we'll change that.
285 1         2 my $css;
286 1 50       4 if ($options->{string}) {
    0          
    0          
287 1         3 $css = $options->{string};
288             }
289             elsif ($options->{ioref}) {
290 0         0 my $io = $options->{ioref};
291 0         0 local $/ = undef;
292 0         0 $css = <$io>;
293 0         0 close $io;
294             }
295             elsif ($options->{filename}) {
296 0 0       0 open CSS, "$options->{filename}" or die $!;
297 0         0 local $/ = undef;
298 0         0 $css = ;
299 0         0 close CSS;
300             }
301             else {
302 0         0 return undef;
303             }
304            
305             ### look at the other options
306             # charsets are forbidden in embedded style sheets
307 1 50       3 if ($options->{embedded}) {
308 0         0 $sac->[_allow_charset_] = 0;
309             }
310             else {
311 1         3 $sac->[_allow_charset_] = 1;
312             }
313            
314            
315            
316             #---> Start Parsing <---------------------------------------------#
317            
318             # start doc
319 1         2 warn "[SAC] start parsing\n" if DEBUG;
320 1 50       5 $sac->[_dh_]->start_document if $sac->[_dh_can_]->{start_document};
321            
322             # before anything else occurs there can be a charset
323 1         2 warn "[SAC] parsing charset\n" if DEBUG;
324 1         5 $sac->parse_charset(\$css);
325 1         4 $sac->[_allow_charset_] = 0;
326            
327             # remove an eventual HTML open comment (not reported to handler)
328 1         1 warn "[SAC] removing HTML comments\n" if DEBUG;
329 1         6 $css =~ s/^\s*\s*//) {
427             # we don't do anything with those presently
428 0         0 warn "[SAC] removing HTML comments\n" if DEBUG;
429             }
430            
431             # we have selectors
432             elsif (my $sel_list = $sac->parse_selector_list(\$css)) {
433 44         43 warn "[SAC] parsed selectors\n" if DEBUG;
434 44 50       95 next unless @$sel_list;
435             # callbacks
436 44 50       250 $sac->[_dh_]->start_selector($sel_list) if $sac->[_dh_can_]->{start_selector};
437            
438             # parse the rule
439 44         347 my $rule;
440 44         39 warn "[SAC] parsing rule\n" if DEBUG;
441            
442             ### BUG
443             # The Text::Balanced extractions below are not correct since they don't take
444             # comments into account. With the first one, it'll fail on apostrophes in
445             # comments, with the latter, on unbalanced apos and } in comments and
446             # apos-strings. The latter is used currently because it is less likely to fail,
447             # but what is needed is a real parser that steps inside the black parsing out
448             # comments and property values. The good news is that we have most of the bits
449             # to do that right already.
450            
451             #($rule,$css,undef) = Text::Balanced::extract_bracketed($css,q/{}'"/,qr/\s*/); #"
452 44         255 ($rule,$css,undef) = Text::Balanced::extract_bracketed($css,q/{}"/,qr/\s*/); #"
453 44         5523 $sac->parse_rule(\$rule);
454            
455             # end of the rule
456 44 50       374 $sac->[_dh_]->end_selector($sel_list) if $sac->[_dh_can_]->{end_selector};
457             }
458            
459             # trailing whitespace, should only happen at the very end
460             elsif ($css =~ s/^\s+//) {
461             # do nothing
462 0         0 warn "[SAC] just whitespace\n" if DEBUG;
463             }
464            
465             # error
466             else {
467 1 50       5 last if ! length $css;
468 0         0 $sac->[_eh_]->fatal_error('Unknown trailing tokens in style sheet: "' . $css . '"');
469 0         0 last;
470             }
471             }
472            
473             # end doc
474 1         3 warn "[SAC] end of document\n" if DEBUG;
475 1 50       0 $sac->[_dh_]->end_document if $sac->[_dh_can_]->{end_document};
476            
477             #---> Finish Parsing <--------------------------------------------#
478            
479             }
480             #---------------------------------------------------------------------#
481             *CSS::SAC::parseStyleSheet = \&parse;
482            
483            
484             #---------------------------------------------------------------------#
485             # $sac->parse_charset($string_ref)
486             # parses a charset
487             #---------------------------------------------------------------------#
488             sub parse_charset {
489 1     1 1 10 my $sac = shift;
490 1         2 my $css = shift;
491            
492             # we don't remove leading ws, the charset must come first
493 1 50       7 return unless $$css =~ s/^\@charset\s+//i;
494            
495             # extract the string
496 0 0       0 if ($$css =~ s/^($RE_STRING)\s*;//) {
497 0         0 my $charset = $1;
498 0         0 $charset =~ s/^(?:'|")//; #"
499 0         0 $charset =~ s/(?:'|")$//; #'
500            
501 0 0       0 $sac->[_dh_]->charset($charset) if $sac->[_dh_can_]->{charset};
502             }
503             else {
504 0 0       0 if ($$css =~ s/[^;]*;//) {
505 0         0 $sac->[_eh_]->warning('Unknown token in charset declaration');
506             }
507             else {
508 0         0 $sac->[_eh_]->fatal_error('Unknown token in charset declaration');
509             }
510             }
511             }
512             #---------------------------------------------------------------------#
513            
514            
515             #---------------------------------------------------------------------#
516             # $sac->parse_imports($string_ref)
517             # parses import rules at the beginning
518             #---------------------------------------------------------------------#
519             sub parse_imports {
520 1     1 1 2 my $sac = shift;
521 1         2 my $css = shift;
522            
523            
524             # we may have several imports separated by comments
525 1         7 while ($$css =~ s/^\s*\@import\s+//i) {
526             # first get the uri
527 0         0 my $uri;
528 0 0       0 if ($$css =~ s/^url\(//) {
529 0         0 $$css =~ s/^((?:$RE_STRING)|([^\)]*))\s*//;
530 0         0 $uri = $1;
531 0         0 $uri =~ s/^(?:'|")//; # "
532 0         0 $uri =~ s/(?:'|")$//; # '
533 0         0 $$css =~ s/^\)//;
534             }
535             else {
536 0         0 $$css =~ s/^($RE_STRING)//;
537 0         0 $uri = $1;
538 0         0 $uri =~ s/^(?:'|")//; #"
539 0         0 $uri =~ s/(?:'|")$//; #'
540             }
541            
542             # a possible medialist
543 0         0 my $medialist = $sac->parse_medialist($css);
544            
545             # we must have a terminating token now
546 0 0       0 if ($$css =~ s/^\s*;//) {
547 0 0       0 $sac->[_dh_]->import_style($uri,$medialist) if $sac->[_dh_can_]->{import_style};
548             }
549             else {
550 0 0       0 if ($$css =~ s/[^;]*;//) {
551 0         0 $sac->[_eh_]->warning('Unknown token in import rule');
552             }
553             else {
554 0         0 $sac->[_eh_]->fatal_error('Unknown token in import rule');
555             }
556             }
557            
558             # remove comments and run again
559 0         0 $sac->parse_comments($css);
560             }
561             }
562             #---------------------------------------------------------------------#
563            
564            
565             #---------------------------------------------------------------------#
566             # $sac->parse_namespace_declarations($string_ref)
567             # parses ns declarations
568             #---------------------------------------------------------------------#
569             sub parse_namespace_declarations {
570 1     1 1 2 my $sac = shift;
571 1         2 my $css = shift;
572            
573             # we may have several ns decls separated by comments
574 1         15 while ($$css =~ s/^\s*\@namespace\s+//i) {
575 1         1 my ($prefix,$uri);
576             # first get the prefix
577 1 50 33     113 if ($$css !~ /^url\(/ and $$css =~ s/^($RE_IDENT)\s+//) {
578 1         4 $prefix = $1;
579             }
580            
581             # then get the uri
582 1 50       14 if ($$css =~ s/^url\(//) {
583 1         84 $$css =~ s/^((?:$RE_STRING)|([^\)]*))\s*//;
584 1         6 $uri = $1;
585 1         5 $uri =~ s/^(?:'|")//; # "
586 1         6 $uri =~ s/(?:'|")$//; # '
587 1         5 $$css =~ s/^\)//;
588             }
589             else {
590 0         0 $$css =~ s/^($RE_STRING)//;
591 0         0 $uri = $1;
592 0         0 $uri =~ s/^(?:'|")//; #"
593 0         0 $uri =~ s/(?:'|")$//; #'
594             }
595            
596             # we must have a terminating token now
597 1 50       6 if ($$css =~ s/^\s*;//) {
598             # store the prefix-ns in our ns map
599 1         3 my $map_prefix = $prefix;
600 1 50       3 $map_prefix = '#default' unless $prefix;
601 1         5 $sac->[_ns_map_]->{$map_prefix} = $uri;
602            
603             # throw a callback
604 1   50     4 $prefix ||= '';
605 1 50       8 $sac->[_dh_]->namespace_declaration($prefix,$uri) if $sac->[_dh_can_]->{namespace_declaration};
606             }
607             else {
608 0 0       0 if ($$css =~ s/[^;]*;//) {
609 0         0 $sac->[_eh_]->warning('Unknown token in namespace declaration');
610             }
611             else {
612 0         0 $sac->[_eh_]->fatal_error('Unknown token in namespace declaration');
613             }
614             }
615            
616             # remove comments and run again
617 1         66 $sac->parse_comments($css);
618             }
619             }
620             #---------------------------------------------------------------------#
621            
622            
623             #---------------------------------------------------------------------#
624             # $sac->parse_medialist($string_ref)
625             # parses a list of media values
626             # returns that list as an arrayref
627             #---------------------------------------------------------------------#
628             sub parse_medialist {
629 0     0 1 0 my $sac = shift;
630 0         0 my $css = shift;
631            
632             # test for the right content and return a list if found
633 0 0       0 return [] unless $$css =~ s/^\s*($RE_IDENT(?:\s*,\s*$RE_IDENT)*)//;
634 0         0 return [map { $_ =~ s/^\s+//; $_ =~ s/\s+$//; $_; } split /,/, $1];
  0         0  
  0         0  
  0         0  
635             }
636             #---------------------------------------------------------------------#
637            
638            
639             #---------------------------------------------------------------------#
640             # $sac->parse_comments($string_ref)
641             # parses as many comments as there are at the beginning of the string
642             #---------------------------------------------------------------------#
643             sub parse_comments {
644 284     284 1 346 my $sac = shift;
645 284         292 my $css = shift;
646            
647             # we may have several comments in a row
648 284         268 my $ws;
649 284         914 while ($$css =~ s|^(\s*)/\*||) {
650 0         0 $ws .= $1;
651 0 0       0 if ($$css =~ s{^((?:(?:\\\\)|(?:\\[^\*])|(?:\\\*)|[^\\])*?)\*/}{}) {
652 0 0       0 $sac->[_dh_]->comment($1) if $sac->[_dh_can_]->{comment};
653             }
654             else {
655 0 0       0 if ($$css =~ s/.*\*\///) {
656 0         0 $sac->[_eh_]->warning('Strange comment token, guessing the parse');
657             }
658             else {
659 0         0 $sac->[_eh_]->fatal_error('Unterminated comment: unrecoverable');
660             }
661             }
662             }
663            
664             # we need to keep the whitespace around for certain
665             # occurences of comments, it may be significant
666 284 50 33     753 $$css = $ws . $$css if defined $ws and defined $$css;
667             }
668             #---------------------------------------------------------------------#
669            
670            
671             #---------------------------------------------------------------------#
672             # $sac->parse_selector_list($string_ref)
673             # parses a list of selectors
674             # returns an array ref of selectors
675             #---------------------------------------------------------------------#
676             sub parse_selector_list {
677 45     45 1 65 my $sac = shift;
678 45         60 my $css = shift;
679            
680             # this is a long and hairy process
681 45         42 my @sels;
682 45         185 $$css =~ s/^\s*//;
683 45         63 while (1) {
684            
685             # we've reached the rule, or there isn't anything left to parse
686 89 100       417 if ($$css =~ m/^\s*\{/) {
    100          
    50          
687 44 50       142 if (!@sels) {
688 0         0 @sels = ($sac->[_sf_]->create_element_selector(undef,undef));
689             }
690 44         58 last;
691             }
692            
693             elsif (!length $$css) {
694 1         2 last;
695             }
696            
697             # a simple selector
698             elsif (my $sel = $sac->parse_simple_selector($css)) {
699 44         68 push @sels, $sel;
700            
701             # delete the rest
702 44         81 $sac->parse_comments($css);
703 44         92 $$css =~ s/^\s*,\s*//;
704 44         83 $sac->parse_comments($css);
705 44         238 $$css =~ s/^\s*//;
706             }
707            
708             # error
709             else {
710             # something wrong must have happened
711 0 0       0 if ($$css =~ s/[^{]*//) {
712 0         0 $sac->[_eh_]->warning('Unknown token in selector list');
713             }
714             else {
715 0         0 $sac->[_eh_]->fatal_error('Unknown token in selector list');
716             }
717             }
718             }
719            
720 45 100       93 return unless @sels; # this returns nothing, there were no selectors (needed for parse)
721 44         230 return CSS::SAC::SelectorList->new(\@sels);
722             }
723             #---------------------------------------------------------------------#
724             *CSS::SAC::parseSelectors = \&parse_selector_list;
725            
726            
727             #---------------------------------------------------------------------#
728             # $sac->parse_simple_selector($string_ref)
729             # parses a simple selector
730             # returns the selector object
731             #---------------------------------------------------------------------#
732             sub parse_simple_selector {
733 46     46 1 65 my $sac = shift;
734 46         46 my $css = shift;
735            
736 46         166 $$css =~ s/^\s*//;
737            
738             ### eat the content piece by piece
739 46         55 my @tokens;
740            
741 46         45 my ($attr,$func,$args);
742 46         48 while (1) {
743            
744 105         237 $sac->parse_comments($css);
745            
746             # end of selector
747 105 100 100     1898 if ($$css =~ m/^\s*(?:,|{)/ or !length $$css) {
    100 66        
    100 66        
    100          
    100          
    100          
    100          
    100          
    100          
    50          
748 46         111 last;
749             }
750            
751             # element
752             elsif ($$css =~ s/^(?:($RE_IDENT|\*)?(\|))?($RE_IDENT|\*)//) {
753             # create element selector
754 19         21 my ($ns,$lname);
755 19 100       66 $lname = ($3 eq '*')?undef:$3;
756 19 100 66     66 if (defined $2 and $2 eq '|') {
757 2 50       9 if (!$1) {
    50          
758 0         0 $ns = ''; # |E matches elements in no namespace
759             }
760             elsif ($1 eq '*') {
761 0         0 $ns = undef; # undef means all, '' means default
762             }
763             else {
764 2         7 $ns = $sac->[_ns_map_]->{$1};
765             }
766             }
767             else {
768             # E matches elements in the default namespace or
769             # any namespace if no default namespace is declared
770 17   50     92 $ns = $sac->[_ns_map_]->{'#default'} || undef;
771             }
772            
773             # push it
774 19         66 push @tokens, $sac->[_sf_]->create_element_selector($ns,$lname);
775             }
776            
777             # hash id
778             elsif ($$css =~ s/^#($RE_NAME)//) {
779 2         10 push @tokens, $sac->[_cf_]->create_id_condition($1);
780             }
781            
782             # dot class
783             elsif ($$css =~ s/^\.($RE_IDENT)//) {
784 2         13 push @tokens, $sac->[_cf_]->create_class_condition(undef,$1);
785             }
786            
787             # CSS3 pseudo-elements
788             elsif ($$css =~ s/^::($RE_IDENT)//) {
789 2         12 push @tokens, $sac->[_sf_]->create_pseudo_element_selector(undef,$1);
790             }
791            
792             # [attr]
793             elsif (
794             (($attr,$$css,undef) = Text::Balanced::extract_bracketed($$css,q/[]'"/,qr/\s*/))
795             and
796             length $attr
797             ) {
798 9         1475 $attr =~ s/^\[\s*//;
799 9         45 $attr =~ s/\s*\]$//;
800            
801             # get the attr lname and ns
802 9         217 $attr =~ s/^(?:($RE_IDENT|\*)?(\|))?($RE_IDENT|\*)//;
803 9         18 my ($ns,$lname);
804 9 50       27 $lname = ($3 eq '*')?undef:$3;
805 9 100 66     52 if (defined $2 and $2 eq '|') {
806 1 50       6 if (!$1) {
    50          
807 0         0 $ns = '' # [|a] matches attributes in no namespace;
808             }
809             elsif ($1 eq '*') {
810 0         0 $ns = undef; # undef means all, '' means default
811             }
812             else {
813 1         4 $ns = $sac->[_ns_map_]->{$1};
814             }
815             }
816             else {
817 8         11 $ns = ''; # [a] is equivalent to [|a]
818             }
819            
820             # if there's more, parse on
821 9         9 my ($op,$value);
822 9 100       20 if (length $attr) {
823 6 50       62 if ($attr =~ s/^((?:\^|\$|\*|\~|\|)?=)//) {
824 6         11 $op = $1;
825 6         24 $attr =~ s/^(?:'|")//; #"
826 6         18 $attr =~ s/(?:'|")$//; #"
827 6         10 $value = $attr;
828             }
829             else {
830 0         0 $sac->[_eh_]->warning('Unknown token in attribute condition');
831 0 0       0 if ($$css =~ s/[^;]*;//) {
832 0         0 $sac->[_eh_]->warning('Unknown token in import rule');
833             }
834             else {
835 0         0 $sac->[_eh_]->fatal_error('Unknown token in import rule');
836             }
837             }
838             }
839            
840             # create the right condition
841 9         14 my $acond;
842 9 100 100     56 if (!$op or $op eq '=') {
    100          
    100          
    50          
    100          
    50          
843 5 100       13 my $spec = (defined $value)?1:0;
844 5         32 $acond = $sac->[_cf_]->create_attribute_condition($lname,$ns,$spec,$value);
845             }
846             elsif ($op eq '^=') {
847 1         6 $acond = $sac->[_cf_]->create_starts_with_attribute_condition($lname,$ns,1,$value);
848             }
849             elsif ($op eq '$=') {
850 1         6 $acond = $sac->[_cf_]->create_ends_with_attribute_condition($lname,$ns,1,$value);
851             }
852             elsif ($op eq '*=') {
853 0         0 $acond = $sac->[_cf_]->create_contains_attribute_condition($lname,$ns,1,$value);
854             }
855             elsif ($op eq '~=') {
856 1         7 $acond = $sac->[_cf_]->create_one_of_attribute_condition($lname,$ns,1,$value);
857             }
858             elsif ($op eq '|=') {
859 1         13 $acond = $sac->[_cf_]->create_begin_hyphen_attribute_condition($lname,$ns,1,$value);
860             }
861            
862 9         43 push @tokens, $acond;
863             }
864            
865             # :pseudo()
866             elsif (
867             ($args,$$css,$func) = Text::Balanced::extract_bracketed($$css,q/()'"/,qr/:$RE_IDENT/)
868             and
869             length $func
870             ) {
871            
872             # cleanup the func and args
873 10         2836 $func =~ s/^://;
874 10         35 $args =~ s/^\(\s*//;
875 10         44 $args =~ s/\s*\)$//;
876 10         20 $args =~ s/^(?:'|")//; #"
877 10         34 $args =~ s/(?:'|")$//; #"
878            
879             # lang(lang_tag)
880 10 100       72 if (lc ($func) eq 'lang') {
    100          
    100          
    50          
881 1         9 push @tokens, $sac->[_cf_]->create_lang_condition($args);
882             }
883            
884             # contains("text")
885             elsif (lc ($func) eq 'contains') {
886 1         8 push @tokens, $sac->[_cf_]->create_content_condition($args);
887             }
888            
889             # not(selector)
890             elsif (lc ($func) eq 'not') {
891 2         118 my $sel = $sac->parse_simple_selector(\$args);
892             # push @tokens, $sac->[_sf_]->create_negative_selector($sel);
893 2         11 push @tokens, $sac->[_cf_]->create_negative_condition($sel);
894             }
895            
896             # positional: nth-child, nth-last-child, nth-of-type, nth-last-of-type,
897             elsif ($func =~ m/^nth-(last-)?((?:child)|(?:of-type))$/i) {
898 6         11 my $pos = $args;
899 6 100       18 my $of_type = (lc($2) eq 'of-type')?1:0;
900 6 100       25 $pos = (lc($1) eq 'last-')?("-$pos"):($pos);
901             # PositionalCondition will take care of parsing
902             # the expressions, and will provide appropriate accessors
903 6         28 push @tokens, $sac->[_cf_]->create_positional_condition($pos,$of_type,0);
904             }
905            
906             # something else we don't know about
907             else {
908 0         0 push @tokens, $sac->[_cf_]->create_pseudo_class_condition(undef,$func);
909             }
910             }
911            
912             # :pseudo (not a function)
913             elsif ($$css =~ s/^\:($RE_IDENT)//) {
914            
915             # root
916 11 100 33     2725 if (lc($1) eq 'root') {
    100 33        
    100 33        
    100 33        
    100          
    100          
    100          
    100          
    50          
917 2         13 push @tokens, $sac->[_cf_]->create_is_root_condition;
918             }
919            
920             # empty
921             elsif (lc($1) eq 'empty') {
922 1         7 push @tokens, $sac->[_cf_]->create_is_empty_condition;
923             }
924            
925             # only-child
926             elsif (lc($1) eq 'only-child') {
927 1         5 my $fcond = $sac->[_cf_]->create_positional_condition(1,0,0);
928 1         6 my $lcond = $sac->[_cf_]->create_positional_condition(-1,0,0);
929 1         6 my $ocond = $sac->[_cf_]->create_and_condition($fcond,$lcond);
930 1         6 push @tokens, $ocond;
931             }
932            
933             # only-of-type
934             elsif (lc($1) eq 'only-of-type') {
935 1         5 my $fcond = $sac->[_cf_]->create_positional_condition(1,1,0);
936 1         6 my $lcond = $sac->[_cf_]->create_positional_condition(-1,1,0);
937 1         5 my $ocond = $sac->[_cf_]->create_and_condition($fcond,$lcond);
938 1         6 push @tokens, $ocond;
939             }
940            
941             # first-child
942             elsif (lc($1) eq 'first-child') {
943 1         6 push @tokens, $sac->[_cf_]->create_positional_condition(1,0,0);
944             }
945            
946             # last-child
947             elsif (lc($1) eq 'last-child') {
948 1         6 push @tokens, $sac->[_cf_]->create_positional_condition(-1,0,0);
949             }
950            
951             # first-of-type
952             elsif (lc($1) eq 'first-of-type') {
953 1         6 push @tokens, $sac->[_cf_]->create_positional_condition(1,1,0);
954             }
955            
956             # last-of-type
957             elsif (lc($1) eq 'last-of-type') {
958 1         7 push @tokens, $sac->[_cf_]->create_positional_condition(-1,1,0);
959             }
960            
961             # pseudo-elements in disguise
962             elsif (
963             lc($1) eq 'first-line' or lc($1) eq 'first-letter' or
964             lc($1) eq 'selection' or lc($1) eq 'before' or lc($1) eq 'after'
965             ) {
966 0         0 push @tokens, $sac->[_sf_]->create_pseudo_element_selector(undef,lc($1));
967             }
968            
969             # regular: link, visited, hover, active, focus, target, enabled, disabled,
970             # checked, indeterminate
971             else {
972 2         12 push @tokens, $sac->[_cf_]->create_pseudo_class_condition(undef,$1);
973             }
974             }
975            
976             # combinators
977             elsif ($$css =~ s/^\s*((?:\+|>|~))\s*//) {
978 3         572 push @tokens, $1;
979             }
980            
981             # special case empty combinator
982             elsif ($$css =~ s/^\s+//) {
983 1         218 push @tokens, ' ';
984             }
985            
986             # an error
987             else {
988 0 0       0 if (s/^.*?(,|{)/$1/) {
989 0         0 $sac->[_eh_]->warning('Unknown token in simple selector');
990             }
991             else {
992 0         0 $sac->[_eh_]->fatal_error('Unknown token in simple selector');
993             }
994             }
995             }
996            
997             ### process the tokens list
998            
999             # if the first token isn't an element selector then create a *|* one
1000             # evaling is lame, but it's the only test I could think of
1001 46         62 eval { $tokens[0]->SelectorType };
  46         430  
1002 46 100       121 if ($@) {
1003 29         108 unshift @tokens, $sac->[_sf_]->create_element_selector(undef,undef);
1004             }
1005            
1006             # start looping over the tokens to reduce the list
1007 46         64 my $selector = shift @tokens;
1008 46         56 eval { $selector->SelectorType };
  46         119  
1009 46 50       97 if ($@) {
1010             # this is a serious exception
1011 0         0 $sac->[_eh_]->fatal_error('Really weird input in simple selector');
1012             }
1013            
1014             # here we need to check whether the next token is also a selector
1015             # if it is, we need to make an AND_CONDITION containing the two selectors
1016             # and to attach it to a universal selector
1017             # then we'll have to mix it into the $cond below.
1018 46 100       88 if (@tokens) {
1019 38         52 eval { $tokens[0]->SelectorType };
  38         300  
1020 38 50       110 if (!$@) {
1021 0         0 my $and_cond = $sac->[_cf_]->create_and_condition($selector,shift @tokens);
1022 0         0 $selector = $sac->[_sf_]->create_element_selector(undef,undef);
1023             }
1024             }
1025            
1026            
1027             # create a conditional selector with all conditions
1028 46         116 my $cond = $sac->build_condition(\@tokens);
1029 46 100       103 if ($cond) {
1030 34         130 $selector = $sac->[_sf_]->create_conditional_selector($selector,$cond);
1031             }
1032            
1033 46         124 while (@tokens) {
1034            
1035             # here there should be a combinator or nothing
1036 4         8 my $comb = shift @tokens;
1037 4 50       11 if ($comb) {
1038             # pretty serious error
1039 4 50       9 $sac->[_eh_]->fatal_error('Really weird input in simple selector') if ref $comb;
1040            
1041             # get the next selector
1042 4         6 my $new_selector = shift @tokens;
1043 4         6 eval { $new_selector->SelectorType };
  4         13  
1044 4 50       10 if ($@) {
1045             # last unless length $new_selector;
1046 0 0       0 if (ref $new_selector) {
1047 0         0 unshift @tokens, $new_selector;
1048 0         0 $new_selector = $sac->[_sf_]->create_element_selector(undef,undef);
1049             }
1050             else {
1051             # this is a serious exception (we don't know what's here)
1052 0         0 $sac->[_eh_]->fatal_error('Really weird input in simple selector: "' . $$css . '"');
1053             }
1054             }
1055            
1056             # create a conditional selector with all conditions
1057 4         10 my $cond = $sac->build_condition(\@tokens);
1058 4 50       9 if ($cond) {
1059 0         0 $new_selector = $sac->[_sf_]->create_conditional_selector($new_selector,$cond);
1060             }
1061            
1062             # various possible combinators
1063 4 100       21 if ($comb eq ' ') {
    100          
    100          
    50          
1064 1         7 $selector = $sac->[_sf_]->create_descendant_selector($selector,$new_selector);
1065             }
1066             elsif ($comb eq '>') {
1067 1         7 $selector = $sac->[_sf_]->create_child_selector($selector,$new_selector);
1068             }
1069             elsif ($comb eq '+') {
1070 1         7 $selector = $sac->[_sf_]->create_direct_adjacent_selector(
1071             ELEMENT_NODE,
1072             $selector,
1073             $new_selector
1074             );
1075             }
1076             elsif ($comb eq '~') {
1077 1         7 $selector = $sac->[_sf_]->create_indirect_adjacent_selector(
1078             ELEMENT_NODE,
1079             $selector,
1080             $new_selector
1081             );
1082             }
1083             }
1084             }
1085            
1086 46         141 return $selector;
1087             }
1088             #---------------------------------------------------------------------#
1089            
1090            
1091             #---------------------------------------------------------------------#
1092             # $sac->build_condition(\@tokens)
1093             # helper to build conditions
1094             #---------------------------------------------------------------------#
1095             sub build_condition {
1096 50     50 1 59 my $sac = shift;
1097 50         55 my $tokens = shift;
1098            
1099             # get all conditions
1100 50         53 my @conditions;
1101 50         108 while (@$tokens) {
1102             # eval { $tokens->[0]->SelectorType };
1103             # if (not $@) {
1104             # $sac->[_eh_]->fatal_error('Really weird input in simple selector');
1105             # }
1106 38 100       96 last if ! ref $tokens->[0];
1107 34         93 push @conditions, shift @$tokens;
1108             }
1109            
1110             # build a single condition out of the others
1111 50         46 my $cond;
1112 50 100       103 if (@conditions) {
1113 34         41 $cond = shift @conditions;
1114 34         69 for my $c (@conditions) {
1115 0         0 $cond = $sac->[_cf_]->create_and_condition($cond,$c);
1116             }
1117             }
1118            
1119 50         106 return $cond;
1120             }
1121             #---------------------------------------------------------------------#
1122            
1123            
1124             #---------------------------------------------------------------------#
1125             # $sac->parse_rule($string_ref)
1126             # parses a rule (with { and })
1127             #---------------------------------------------------------------------#
1128             sub parse_rule {
1129 44     44 1 61 my $sac = shift;
1130 44         52 my $css = shift;
1131 44 50       111 return unless defined $$css;
1132            
1133             # remove { and }, and parse the content
1134 44         220 $$css =~ s/^\s*{//;
1135 44         170 $$css =~ s/}\s*$//;
1136 44         46 warn "[SAC] removed curlies\n" if DEBUG;
1137 44         107 $sac->parse_style_declaration($css);
1138             }
1139             #---------------------------------------------------------------------#
1140             *CSS::SAC::parseRule = \&parse_rule;
1141            
1142            
1143             #---------------------------------------------------------------------#
1144             # $sac->parse_style_declaration($string_ref)
1145             # same as parse_rule, but without the { and }. Cool for HTML, SVG...
1146             #---------------------------------------------------------------------#
1147             sub parse_style_declaration {
1148 44     44 1 56 my $sac = shift;
1149 44         53 my $css = shift;
1150            
1151             # take those prop-val one by one
1152 44         94 $sac->parse_comments($css);
1153 44         121 $$css =~ s/^\s*//;
1154 44         184 while (length $$css) {
1155             # the property
1156 0         0 $$css =~ s/^(-?$RE_IDENT)\s*//; # includes the - prefix
1157 0         0 my $prop = $1;
1158 0         0 $sac->parse_comments($css);
1159            
1160             # the separator
1161 0         0 $$css =~ s/^\s*:\s*//;
1162 0         0 $sac->parse_comments($css);
1163            
1164             # the value
1165 0         0 my $lu = $sac->parse_property_value($css);
1166 0 0       0 if (!@$lu) {
1167 0 0       0 last unless length $$css;
1168 0 0       0 if ($$css =~ s/[^;}]*(?:;|\})?//) { # this is a bit dodgy...
1169 0         0 $sac->[_eh_]->warning('Unknown token in style declaration');
1170             }
1171             else {
1172 0         0 $sac->[_eh_]->fatal_error('Unknown token in style declaration: "' . $$css . '"');
1173             }
1174 0         0 next;
1175             }
1176 0         0 $sac->parse_comments($css);
1177            
1178             # the priority
1179 0         0 my $prio = $sac->parse_priority($css);
1180 0         0 $sac->parse_comments($css);
1181            
1182             # the terminator
1183 0         0 $$css =~ s/^\s*;\s*//;
1184            
1185             # callback
1186 0   0     0 $prio ||= 0;
1187 0 0       0 $sac->[_dh_]->property($prop,$lu,$prio) if $sac->[_dh_can_]->{property};
1188            
1189             # remove cruft
1190 0         0 $sac->parse_comments($css);
1191 0         0 $$css =~ s/^\s*//;
1192             }
1193             }
1194             #---------------------------------------------------------------------#
1195             *CSS::SAC::parseStyleDeclaration = \&parse_style_declaration;
1196            
1197            
1198             #---------------------------------------------------------------------#
1199             # $sac->parse_property_value($string_ref)
1200             # parses a value
1201             # returns an array ref of lexical units
1202             #---------------------------------------------------------------------#
1203             sub parse_property_value {
1204 0     0 1 0 my $sac = shift;
1205 0         0 my $css = shift;
1206 0   0     0 my $att = shift || 0;
1207            
1208 0         0 $$css =~ s/^\s*//;
1209            
1210             # parse it by value chunks
1211 0         0 my @lus;
1212 0         0 while (1) {
1213 0         0 my ($type,$text,$value);
1214            
1215 0         0 $sac->parse_comments($css);
1216            
1217             # exit conditions
1218 0 0 0     0 if (! length($$css) or $$css =~ m/^\s*(?:;|!)/ or ($att and $$css =~ s/^\s*(?:\))//)) {
    0 0        
    0 0        
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
1219 0         0 last;
1220             }
1221            
1222             # ops
1223             elsif ($$css =~ s{^\s*(,|/)\s*}{}) {
1224 0         0 $value = $1;
1225 0 0       0 if ($value eq ',') {
1226 0         0 $type = OPERATOR_COMMA;
1227 0         0 $text = 'comma';
1228             }
1229             else {
1230 0         0 $type = OPERATOR_SLASH;
1231 0         0 $text = 'slash';
1232             }
1233             }
1234            
1235             # special case empty op
1236             elsif ($$css =~ s{^\s+}{}) {
1237 0         0 next;
1238             }
1239            
1240             # inherit
1241             elsif ($$css =~ s/^inherit//) {
1242 0         0 $type = INHERIT;
1243 0         0 $text = 'inherit';
1244 0         0 $value = undef;
1245             }
1246            
1247             # lengths and assoc
1248             elsif ($$css =~ s/^((?:\+|-)?$RE_NUM)
1249             (em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)
1250             //xi) {
1251 0         0 $value = $1;
1252 0         0 $text = lc $2;
1253 0         0 $type = $DIM_MAP{$text};
1254             }
1255            
1256             # dimension
1257             elsif ($$css =~ s/^((?:\+|-)?$RE_NUM)($RE_IDENT)//) {
1258 0         0 $value = $1;
1259 0         0 $text = lc $2;
1260 0         0 $type = DIMENSION;
1261             }
1262            
1263             # number
1264             elsif ($$css =~ s/^((?:\+|-)?$RE_NUM)//) {
1265 0         0 $value = $1;
1266 0         0 $text = 'number';
1267 0 0       0 if ($value =~ m/\./) {
1268 0         0 $type = REAL;
1269             }
1270             else {
1271 0         0 $type = INTEGER;
1272             }
1273             }
1274            
1275             # unicode range
1276             elsif ($$css =~ s/^($RE_RANGE)//) {
1277 0         0 $value = $1;
1278 0         0 $text = 'unicode-range';
1279 0         0 $type = UNICODERANGE;
1280             }
1281            
1282             # hex rgb
1283             elsif ($$css =~ s/^#([0-9a-fA-F]{6}|[0-9a-fA-F]{3})//) {
1284 0         0 $value = $1;
1285 0         0 $text = '#';
1286 0         0 $type = RGBCOLOR;
1287             }
1288            
1289             # functions
1290             # elsif (
1291             # ($value,$$css,$text) = Text::Balanced::extract_bracketed($$css,q/()'"/,qr/$RE_IDENT/)
1292             # and
1293             # length $text
1294             # ) {
1295             elsif ($$css =~ s/^($RE_IDENT)\(//) {
1296            
1297             # cleanup the func and args
1298             # $text = lc $text;
1299             # $value =~ s/^\(\s*//;
1300             # $value =~ s/\s*\)$//;
1301             # $value =~ s/^(?:"|')//; #"
1302             # $value =~ s/(?:"|')$//; #"
1303 0         0 $text = lc $1;
1304 0         0 $value = $sac->parse_property_value($css, 1);
1305            
1306             # get the appropriate type
1307 0 0       0 if ($FUNC_MAP{$text}) {
1308 0         0 $type = $FUNC_MAP{$text};
1309             }
1310             else {
1311 0         0 $type = FUNCTION;
1312             }
1313             }
1314            
1315             # ident
1316             elsif ($$css =~ s/^($RE_IDENT)//) {
1317 0         0 $value = $1;
1318 0         0 $text = 'ident';
1319 0         0 $type = IDENT;
1320             }
1321            
1322             # string
1323             elsif ($$css =~ s/^($RE_STRING)//) {
1324 0         0 $value = $1;
1325 0         0 $value =~ s/^(?:"|')//; #"
1326 0         0 $value =~ s/(?:"|')$//; #"
1327 0         0 $text = 'string';
1328 0         0 $type = STRING_VALUE;
1329             }
1330            
1331             # error
1332             else {
1333 0         0 return [];
1334             }
1335            
1336             # add a lu
1337 0         0 push @lus, CSS::SAC::LexicalUnit->new($type,$text,$value);
1338             }
1339            
1340 0         0 return \@lus;
1341             }
1342             #---------------------------------------------------------------------#
1343             *CSS::SAC::parsePropertyValue = \&parse_property_value;
1344            
1345            
1346             #---------------------------------------------------------------------#
1347             # $sac->parse_priority($string_ref)
1348             # parses a priority
1349             # returns true if there is a priority value there
1350             #---------------------------------------------------------------------#
1351             sub parse_priority {
1352 0     0 1 0 my $sac = shift;
1353 0         0 my $css = shift;
1354            
1355 0 0       0 return 1 if $$css =~ s/^\s*!\s*important//i;
1356             }
1357             #---------------------------------------------------------------------#
1358             *CSS::SAC::parsePriority = \&parse_priority;
1359            
1360            
1361             # #
1362             # #
1363             ### Parsing Methods ###################################################
1364            
1365            
1366            
1367             ### Default Error Handler #############################################
1368             # #
1369             # #
1370            
1371             # This is pretty much a non package, it is just there to provide the
1372             # default error handler.
1373            
1374             package CSS::SAC::DefaultErrorHandler;
1375            
1376 2     2   21 sub new { return bless [], __PACKAGE__; }
1377 0     0     sub warning { warn "[warning] $_[1] (line " . (caller)[2] . ")"; }
1378 0     0     sub error { warn "[error] $_[1] (line " . (caller)[2] . ")"; }
1379 0     0     sub fatal_error { die "[fatal] $_[1] (line " . (caller)[2] . ")"; }
1380            
1381            
1382             # #
1383             # #
1384             ### Default Error Handler #############################################
1385            
1386            
1387            
1388             1;
1389            
1390             =pod
1391            
1392             =head1 NAME
1393            
1394             CSS::SAC - SAC CSS parser
1395            
1396             =head1 SYNOPSIS
1397            
1398             use CSS::SAC qw();
1399             use My::SACHandler ();
1400             use My::SACErrors ();
1401            
1402             my $doc_handler = My::SACHandler->new;
1403             my $err_handler = My::SACErrors->new;
1404             my $sac = CSS::SAC->new({
1405             DocumentHandler => $doc_handler,
1406             ErrorHandler => $err_handler,
1407             });
1408            
1409             # generate a stream of events
1410             $sac->parse({ filename => 'foo.css' });
1411            
1412             =head1 DESCRIPTION
1413            
1414             SAC (Simple API for CSS) is an event-based API much like SAX for XML.
1415             If you are familiar with the latter, you should have little trouble
1416             getting used to SAC. More information on SAC can be found online at
1417             http://www.w3.org/TR/SAC.
1418            
1419             CSS having more constructs than XML, core SAC is still more complex
1420             than core SAX. However, if you need to parse a CSS style sheet, SAC
1421             probably remains the easiest way to get it done.
1422            
1423             Most of the spec is presently implemented. The following interfaces
1424             are not yet there: Locator, CSSException, CSSParseException,
1425             ParserFactory. They may or may not be implemented at a later date
1426             (the most likely candidates are the exception classes, for which I
1427             still have to find an appropriate model).
1428            
1429             Some places differ slightly from what is in the spec. I have tried to
1430             keep those to a justified minimum and to flag them correctly.
1431            
1432             =head2 the CSS::SAC module itself
1433            
1434             The Parser class doesn't exist separately, it's defined in CSS::SAC.
1435             It doesn't expose the locale interface because we don't localize
1436             errors (yet). It also doesn't have C but rather
1437             C, which is more consistent with other Perl parsing interfaces.
1438            
1439             I have added the C callback to the DocumentHandler
1440             interface. There are valid reasons why it wasn't there (it can be
1441             trusted only ever so often, and one should look at the actual encoding
1442             instead) but given that it's a token in the grammar, I believe that
1443             there should still be a way to access it.
1444            
1445             =head1 METHODS
1446            
1447             =over 4
1448            
1449             =item * CSS::SAC->new(\%options) or $sac->new(\%options)
1450            
1451             Constructs a new parser object. The options can be:
1452            
1453             - ConditionFactory and SelectorFactory
1454             the factory classes used to build selector and condition objects.
1455             See CSS::SAC::{Condition,Selector}Factory for more details on the
1456             interfaces those classes must expose.
1457            
1458             - DocumentHandler and ErrorHandler
1459             the handler classes used as sinks for the event stream received
1460             from a SAC Driver. See CSS::SAC::{Document,Error}Factory for more
1461             details on the interfaces those classes must expose.
1462            
1463             Methods will be called on whatever it is you pass as values to those
1464             options. Thus, you may pass in objects as well as class names (I
1465             haven't tested this yet, there may be a problem).
1466            
1467             NOTE: an error handler should implement all callbacks, while a document
1468             handler may only implement those it is interested in. There is a default
1469             error handler (which dies and warns depending on the type of error) but
1470             not default document handler.
1471            
1472             =item * $sac->ParserVerion or $sac->getParserVerion
1473            
1474             Returns the supported CSS version.
1475            
1476             Requesting this parser's ParserVersion will return the string 'CSS3'.
1477             While that is (modulo potential bugs of course) believed to be
1478             generally true, several caveats apply:
1479            
1480             To begin with, CSS3 has been modularised, and various modules are at
1481             different stages of development. Evolving modules may require evolving
1482             this parser. I hesitated between making ParserVersion return CSS2,
1483             CSS3-pre, or simply CSS3. I chose the latter because I intend to
1484             update it as I become aware of the necessity of changes to accommodate
1485             new CSS3 stuff, and because it already supports a number of constructs
1486             alien to CSS2 (of which namespaces is imho important enough to justify
1487             a CSS3 tag). If you are aware of incompatibilities, please contact me.
1488            
1489             More importantly, it is now considered wrong for a parser to return
1490             CSSx as its version and instead it is expected to return an uri
1491             corresponding to the uri of the CSS version that it supports. However,
1492             there is no uri for CSS3, but instead one uri per module. While this
1493             issue hasn't been resolved by the WG, I will stick to returning CSS3.
1494             However, B in
1495             the future, so please avoid relying on it.
1496            
1497             =item * $cf = $sac->ConditionFactory
1498            
1499             =item * $sac->ConditionFactory($cf) or $sac->setConditionFactory($cf)
1500            
1501             =item * $cf = $sac->SelectorFactory
1502            
1503             =item * $sac->SelectorFactory($sf) or $sac->setSelectorFactory($sf)
1504            
1505             =item * $cf = $sac->DocumentHandler
1506            
1507             =item * $sac->DocumentHandler($dh) or $sac->setDocumentHandler($dh)
1508            
1509             =item * $cf = $sac->ErrorHandler
1510            
1511             =item * $sac->ErrorHandler($eh) or $sac->setErrorHandler($eh)
1512            
1513             get/set the ConditionFactory, SelectorFactory, DocumentHandler,
1514             ErrorHandler that we use
1515            
1516             =item * $sac->parse(\%options)
1517            
1518             =item * $sac->parseStyleSheet(\%options)
1519            
1520             parses a style sheet and sends events to the defined handlers. The
1521             options that you can use are:
1522            
1523             =over 8
1524            
1525             =item * string
1526            
1527             =item * ioref
1528            
1529             =item * filename
1530            
1531             passes either a string, an open filehandle, or a filename to read the
1532             stylesheet from
1533            
1534             =item * embedded
1535            
1536             tells whether the stylesheet is embedded or not. This is most of the
1537             time useless but it will influence the interpretation of @charset
1538             rules. The latter being forbidden in embedded style sheets they will
1539             generate an ignorable_style_sheet event instead of a charset event if
1540             embedded is set to a true value.
1541            
1542             =back
1543            
1544             =item * $sac->parse_rule($string_ref)
1545            
1546             =item * $sac->parseRule($string_ref)
1547            
1548             parses a rule (with { and }). You probably don't need this one. It
1549             returns nothing, but generates the events.
1550            
1551             =item * $sac->parse_style_declaration($string_ref)
1552            
1553             =item * $sac->parseStyleDeclaration($string_ref)
1554            
1555             same as parse_rule, but without the { and }. This is useful when you
1556             want to parse style declarations embedded using style attributes in
1557             HTML, SVG, etc... It returns nothing, but generates the events.
1558            
1559             =item * $sac->parse_property_value($string_ref)
1560            
1561             =item * $sac->parsePropertyValue($string_ref)
1562            
1563             parses a property value and returns an array ref of lexical units
1564             (see CSS::SAC::LexicalUnit)
1565            
1566             =item * $sac->parse_priority($string_ref)
1567            
1568             =item * $sac->parsePriority($string_ref)
1569            
1570             parses a priority and returns true if there is a priority value there.
1571            
1572             =item * $sac->parse_selector_list($string_ref)
1573            
1574             =item * $sac->parseSelectors($string_ref)
1575            
1576             parses a list of selectors and returns an array ref of selectors
1577            
1578             =back
1579            
1580             =head1 OTHER METHODS
1581            
1582             Methods in this section are of relevance mostly to the internal
1583             workings of the parser. I document them here but I don't really
1584             consider them part of the interface, and thus may change them if need
1585             be. If you are using them directly tell me about it and I will
1586             "officialize" them. These have no Java style equivalent.
1587            
1588             =over 4
1589            
1590             =item * $sac->parse_charset($string_ref)
1591            
1592             parses a charset. It returns nothing, but generates the events.
1593            
1594             =item * $sac->parse_imports($string_ref)
1595            
1596             parses import rules. It returns nothing, but generates the events.
1597            
1598             =item * $sac->parse_namespace_declarations($string_ref)
1599            
1600             parses ns declarations. It returns nothing, but generates the events.
1601            
1602             =item * $sac->parse_medialist($string_ref)
1603            
1604             parses a list of media values and returns that list as an arrayref
1605            
1606             =item * $sac->parse_comments($string_ref)
1607            
1608             parses as many comments as there are at the beginning of the string.
1609             It returns nothing, but generates the events.
1610            
1611             =item * $sac->parse_simple_selector($string_ref)
1612            
1613             parses a simple selector and returns the selector object
1614            
1615             =item * $sac->build_condition(\@tokens)
1616            
1617             helper to build conditions (you probably don't want to use this at
1618             all...)
1619            
1620             =back
1621            
1622             =head1 CSS::SAC::DefaultErrorHandler
1623            
1624             This is pretty much a non package, it is just there to provide the
1625             default error handler if you are too lazy to provide one yourself.
1626            
1627             All it does is pretty simple. There are three error levels:
1628             C, C, and C. What it does is warn on the
1629             two first and die on the last. Yes, it ain't fancy but then you can
1630             plug anything more intelligent into it at any moment.
1631            
1632            
1633             =head1 CSS3 ISSUES
1634            
1635             One problem is that I have modelled this parser after existing SAC
1636             implementations that do not take into account as much of CSS3 as it is
1637             possible to. Some parts of that are trivial, and I have provided
1638             support on my own in this module. Other parts though are more
1639             important and I believe that coordination between the SAC authors
1640             would be beneficial on these points (once the relevant CSS3 modules
1641             will have moved to REC).
1642            
1643             =over 4
1644            
1645             =item * new attribute conditions
1646            
1647             CSS3-selectors introduces a bunch of new things, including new
1648             attribute conditions ^= (starts with), $= (ends with) and *=
1649             (contains). There are no corresponding constants for conditions, so I
1650             suggested SAC_STARTS_WITH_ATTRIBUTE_CONDITION,
1651             SAC_ENDS_WITH_ATTRIBUTE_CONDITION, SAC_CONTAINS_ATTRIBUTE_CONDITION.
1652            
1653             Note that these constants have been added, together with the
1654             corresponding factory methods. However, they will remain undocumented
1655             and considered experimental until some consensus is reached on the
1656             matter.
1657            
1658             =item * :root condition
1659            
1660             The :root token confuses some people because they think it is
1661             equivalent to XPath's / root step. That is not so. XPath's root
1662             selects "above" the document element. CSS's :root tests whether an
1663             element is the document element, there is nothing above a document
1664             element. Thus :root on its own is equivalent to *:root. It's a
1665             condition, not a selector. E:root matches the E element that is also
1666             the document element (if there is one).
1667            
1668             Thus, SAC_ROOT_NODE_SELECTOR does not apply and we need a new
1669             SAC_IS_ROOT_CONDITION constant.
1670            
1671             Note that this constant has been added, together with the
1672             corresponding factory method. However, it will remain undocumented
1673             and considered experimental until some consensus is reached on the
1674             matter.
1675            
1676             =item * other new pseudo-classes
1677            
1678             :empty definitely needs a constant too I'd say.
1679            
1680             Note that this constant has been added, together with the
1681             corresponding factory method. However, it will remain undocumented
1682             and considered experimental until some consensus is reached on the
1683             matter.
1684            
1685             =item * an+b syntax in positional conditions
1686            
1687             There is new syntax that allows for very customisable positional
1688             selecting. PositionalCondition needs to be updated to deal with that.
1689            
1690             =back
1691            
1692             =head1 BUGS
1693            
1694             - the problem with attaching pseudo-elements to elements as
1695             coselectors. I'm not sure which is the right representation. Don't
1696             forget to update CSS::SAC::Writer too so that it writes it out
1697             properly.
1698            
1699             - see Bjoern's list
1700            
1701             =head1 ACKNOWLEDGEMENTS
1702            
1703             - Bjoern Hoehrmann for his immediate reaction and much valuable
1704             feedback and suggestions. It's certainly much harder to type with all
1705             those fingers that all those Mafia padres have cut off, but at least
1706             I get work done much faster than before. And also those nasty bugs he
1707             kindly uncovered.
1708            
1709             - Steffen Goeldner for spotting bugs and providing patches.
1710            
1711             - Ian Hickson for very very very kind testing support, and all sorts
1712             of niceties.
1713            
1714             - Manos Batsis for starting a very long discussion on this that
1715             eventually deviated into other very interesting topics, and for
1716             giving me some really weird style sheets to feed into this module.
1717            
1718             - Simon St.Laurent for posting this on xmlhack.com and thus pointing a
1719             lot of people to this module (as seen in my referer logs).
1720            
1721             And of course all the other people that have sent encouragement notes
1722             and feature requests.
1723            
1724             =head1 TODO
1725            
1726             - add a pointer to the SAC W3 page
1727            
1728             - create the Exception classes
1729            
1730             - update PositionalCondition to include logic that can normalize the
1731             an+n notation and add a method that given a position will return a
1732             boolean indicating whether it matches the condition.
1733            
1734             - add stringify overloading to all classes so that they may be
1735             printed directly
1736            
1737             - have parser version return an overloaded object that circumvents the
1738             current problems
1739            
1740             - add docs on how to write a {Document,Error}Handler, right now there
1741             is example code in Writer, but it isn't all clearly explained.
1742            
1743             - find a way to make the '-' prefix to properties optional
1744            
1745             - add a filter that switches events to spec names, and that can be used
1746             directly through an option
1747            
1748             - add DOM-like hasFeature support (in view of SAC 3)
1749            
1750             - prefix all constants with SAC_. Keep the old ones around for a few
1751             versions, importable with :old-constants.
1752            
1753             - update docs
1754            
1755             =head1 AUTHOR
1756            
1757             Robin Berjon
1758            
1759             This module is licensed under the same terms as Perl itself.
1760            
1761             =cut