|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # -*- cperl -*-  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # copyright (C) 2005 Topia . all rights reserved.  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # This is free software; you can redistribute it and/or modify it  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   under the same terms as Perl itself.  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $Id: Util.pm 104 2005-02-05 08:26:34Z topia $  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $URL: file:///usr/minetools/svnroot/mixi/trunk/WWW-Mixi-OO/lib/WWW/Mixi/OO/Util.pm $  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package WWW::Mixi::OO::Util;  | 
| 
8
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
20
 | 
 use strict;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
182
 | 
    | 
| 
9
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
14
 | 
 use warnings;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
147
 | 
    | 
| 
10
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
20
 | 
 use URI;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64
 | 
    | 
| 
11
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
18
 | 
 use Carp;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
250
 | 
    | 
| 
12
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
8098
 | 
 use HTML::Parser;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23660
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
131
 | 
    | 
| 
13
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
4062
 | 
 use Hash::Case::Preserve;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
528253
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5251
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $regex_parts;  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __PACKAGE__->_init_regex_parts;  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 WWW::Mixi::OO::Util - WWW::Mixi::OO's Helper Functions  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use base qw(WWW::Mixi::OO::Util);  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $this->absolute_uri(..., ...);  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 misc helper functions.  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item absolute_uri  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->absolute_uri($uri, [$base]);  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Generate absolute URI from base uri.  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is simple wrapper for URI class.  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub absolute_uri {  | 
| 
46
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
7
 | 
     my ($this, $uri, $base) = @_;  | 
| 
47
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     return do {  | 
| 
48
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	if (defined $base) {  | 
| 
49
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	    URI->new_abs($uri, $base)  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} else {  | 
| 
51
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    URI->new($uri);  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item relative_uri  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->relative_uri($uri, [$base]);  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Generate relative URI from base uri.  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is simple wrapper for URI class.  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub relative_uri {  | 
| 
66
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($this, $uri, $base) = @_;  | 
| 
67
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return  $this->absolute_uri($uri, $base)->rel($base);  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item remove_tag  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->remove_tag($str);  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Remove HTML(or XML, or SGML?) tag from string.  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub remove_tag {  | 
| 
79
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
  
1
  
 | 
1123
 | 
     my ($this, $str) = @_;  | 
| 
80
 | 
15
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     return undef unless defined $str;  | 
| 
81
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     my $non_metas = $this->regex_parts->{non_metas};  | 
| 
82
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
196
 | 
     my $re_standard_tag = qr/  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       <$non_metas  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (?:"[^\"]*"$non_metas|'[^\']*'$non_metas)*  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (?:>|(?=<)|$(?!\n))  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /x;  | 
| 
87
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
     my $re_comment_tag  = qr/
 | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (?:  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        --[^-]*-  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        (?:[^-]+-)*?-  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        (?:[^>-]*(?:-[^>-]+)*?)??  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       )*  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (?:>|$(?!\n)|--.*$)/x;  | 
| 
94
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
161
 | 
     my $re_html_tag     = qr/$re_comment_tag|$re_standard_tag/;  | 
| 
95
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
144
 | 
     $str =~ s/$re_html_tag//g;  | 
| 
96
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
93
 | 
     return $str;  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item extract_balanced_html_parts  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->extract_balanced_html_parts(  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ignore_outside => 1,  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       element => 'table',  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       text => ...);  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 extract _balanced_ HTML parts from text.  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 options:  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item element  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 element name for balanced check.  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item text  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 text to extract.  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item ignore_outside  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ignore Ith outside element.  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 example:  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->extract_balanced_html_parts(  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ignore_outside => 1,  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       element => 'table',  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       text => '');  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # returns:  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # ('', '')  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item exclude_border_element  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 exclude border element from generate part.  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 example:  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->extract_balanced_html_parts(  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ignore_outside => 1,  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       exclude_border_element => 1,  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       element => 'table',  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       text => '');  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # returns:  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # ('abc', 'cde')  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub extract_balanced_html_parts {  | 
| 
152
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
1150
 | 
     my ($this, %options) = @_;  | 
| 
153
 | 
4
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
21
 | 
     my $level = - ($options{ignore_outside} || 0);  | 
| 
154
 | 
4
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
21
 | 
     my $debug = $options{debug} || 0;  | 
| 
155
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     my $exclude_border_element = $options{exclude_border_element} ? 1 : 0;  | 
| 
156
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     my @ret;  | 
| 
157
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     my $temp = '';  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $parser = HTML::Parser->new(  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	api_version => 3,  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	start_h => [  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    sub {  | 
| 
162
 | 
28
 | 
 
 | 
 
 | 
  
28
  
 | 
 
 | 
41
 | 
 		my ($text, $skipped_text) = @_;  | 
| 
163
 | 
28
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
56
 | 
 		$temp .= $skipped_text if $level > 0;  | 
| 
164
 | 
28
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
51
 | 
 		$temp .= $text if $level >= $exclude_border_element;  | 
| 
165
 | 
28
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
63
 | 
 		printf "level/\%02d> \%s (\%s)\n\n", $level, $text,  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    substr($skipped_text,0,50) if $debug;  | 
| 
167
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
112
 | 
 		++$level;  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }, "text,skipped_text" ],  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	end_h => [  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    sub {  | 
| 
171
 | 
28
 | 
 
 | 
 
 | 
  
28
  
 | 
 
 | 
40
 | 
 		my ($text, $skipped_text) = @_;  | 
| 
172
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
 		$temp .= $skipped_text;  | 
| 
173
 | 
28
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
55
 | 
 		$temp .= $text if $level > $exclude_border_element;  | 
| 
174
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
 		--$level;  | 
| 
175
 | 
28
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
53
 | 
 		printf "level/\%02d< \%s (\%s)\n\n", $level, $text,  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    substr($skipped_text,0,50) if $debug;  | 
| 
177
 | 
28
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
57
 | 
 		push @ret, $temp if $level == 0;  | 
| 
178
 | 
28
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
138
 | 
 		$temp = '' if $level <= 0;  | 
| 
179
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
 	    }, 'text,skipped_text' ],  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        );  | 
| 
181
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
232
 | 
     $parser->report_tags($options{element});  | 
| 
182
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
     $parser->parse($options{text});  | 
| 
183
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
78
 | 
     return @ret;  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item html_attrs_to_hash  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my %hash = $util->html_attrs_to_hash('href="..."');  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or more useful:  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $case_ignore_hash = $util->generate_ignore_case_hash($util->html_attrs_to_hash('href="..."'));  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parse html attributes string to hash.  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub html_attrs_to_hash {  | 
| 
199
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($this, $str) = @_;  | 
| 
200
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $html_attr = $this->regex_parts->{html_attr};  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     map {  | 
| 
203
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if (/\A(.+?)=(.*)\z/) {  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
204
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    ($1, $this->unquote($2))  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} else {  | 
| 
206
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    ($_, undef);  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } ($str =~ /($html_attr)(?:\s+|$)/go);  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item generate_ignore_case_hash  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $case_insensitive_hash = $util->generate_ignore_case_hash(%hash);  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 hash to ignore case hash.  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub generate_ignore_case_hash {  | 
| 
220
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my $this = shift;  | 
| 
221
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     tie my(%hash), 'Hash::Case::Preserve', keep => 'FIRST';  | 
| 
222
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     %hash = @_;  | 
| 
223
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     \%hash;  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item generate_case_preserved_hash  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 obsolete. renamed to generate_ignore_case_hash  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub generate_case_preserved_hash {  | 
| 
233
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     shift->generate_ignore_case_hash(@_);  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item copy_hash_val  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->copy_hash_val(\%src_hash, \%dest_hash, qw(foo bar baz));  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->copy_hash_val(\%src_hash, \%dest_hash, [qw(foo bar)], [qw(baz qux)]);  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 copy hash value on key exist  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub copy_hash_val {  | 
| 
249
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my $this = shift;  | 
| 
250
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $src = shift;  | 
| 
251
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $dest = shift;  | 
| 
252
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my ($attr_src, $attr_dest);  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
254
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     foreach (@_) {  | 
| 
255
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	if (defined ref && ref eq 'ARRAY') {  | 
| 
256
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    ($attr_src, $attr_dest) = @$_;  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} else {  | 
| 
258
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    $attr_src = $attr_dest = $_;  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
260
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$dest->{$attr_dest} = $src->{$attr_src} if exists $src->{$attr_src};  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item regex_parts  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->regex_parts->{$foo};  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 return some regex parts's hashref.  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parts:  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item non_meta  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 html non-meta char (not ["'<>]).  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item non_metas  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 html non-meta chars ($non_meta*).  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item non_meta_spc  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 html non-meta-and-spc char (not ["'<> ]).  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item non_meta_spcs  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 html non-meta-and-spc chars ($non_meta_spc*).  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item non_meta_spc_eq  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 html non-meta-and-spc-eq char (not ["'<> =]).  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item non_meta_spc_eqs  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 html non-meta-and-spc-eq chars ($non_meta_spc_eq*).  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item html_quotedstr_no_paren  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 html quoted string without grouping paren.  | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item html_quotedstr  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 html quoted string with grouping.  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item html_attrval  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 html attribute value.  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item html_attr  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 html attribute  | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item html_maybe_attrs  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 maybe html attributes found  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub regex_parts {  | 
| 
323
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
  
1
  
 | 
43
 | 
     return $regex_parts;  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _init_regex_parts {  | 
| 
327
 | 
3
 | 
 
 | 
  
 50
  
 | 
  
3
  
 | 
 
 | 
31
 | 
     my $parts = $regex_parts ||= {};  | 
| 
328
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     $$parts{non_meta} =  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/[^\"\'<>]/o;  | 
| 
330
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
76
 | 
     $$parts{non_metas} =  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/$$parts{non_meta}*/o;  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
333
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     $$parts{non_meta_spc} =  | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/[^\"\'<> ]/o;  | 
| 
335
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
     $$parts{non_meta_spcs} =  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/$$parts{non_meta_spc}*/o;  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
338
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     $$parts{non_meta_spc_eq} =  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/[^\"\'<> =]/o;  | 
| 
340
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
     $$parts{non_meta_spc_eqs} =  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/$$parts{non_meta_spc_eq}*/o;  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
343
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     $$parts{html_quotedstr_no_paren} =  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/"[^"]*"|'[^']*'/o;  | 
| 
345
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
94
 | 
     $$parts{html_quotedstr} =  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/(?:$$parts{html_quotedstr_no_paren})/o;  | 
| 
347
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
126
 | 
     $$parts{html_attrval} =  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/(?:$$parts{html_quotedstr_no_paren}|$$parts{non_meta_spcs})+/o;  | 
| 
349
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
114
 | 
     $$parts{html_attr} =  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/$$parts{non_meta_spc_eq}+(?:=$$parts{html_attrval})?/o;  | 
| 
351
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
126
 | 
     $$parts{html_maybe_attrs} =  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	qr/(?:\s+$$parts{html_attr})*/o;  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item escape  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->escape($str);  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 equivalent of CGI::escapeHTML.  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub escape {  | 
| 
365
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
6347
 | 
     my $this = shift;  | 
| 
366
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     $_ = shift;  | 
| 
367
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
27
 | 
     return undef unless defined;  | 
| 
368
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
     s/\&(amp|quot|apos|gt|lt);/&$1;/g;  | 
| 
369
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     s/\&(?!(?:[a-zA-Z]+|#\d+|#x[a-f\d]+);)/&/g;  | 
| 
370
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     s/\"/"/g;  | 
| 
371
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
83
 | 
     s/\'/'/g;  | 
| 
372
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     s/>/>/g;  | 
| 
373
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     s/</g;  | 
| 
374
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
     return $_;  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item unescape  | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->unescape($str);  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 HTML unescape.  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub unescape {  | 
| 
386
 | 
24
 | 
 
 | 
 
 | 
  
24
  
 | 
  
1
  
 | 
36
 | 
     my $this = shift;  | 
| 
387
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
     $_ = shift;  | 
| 
388
 | 
24
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
57
 | 
     return undef unless defined;  | 
| 
389
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
     s/"/\"/g;  | 
| 
390
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     s/'/\'/g;  | 
| 
391
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
     s/>/>/g;  | 
| 
392
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
     s/</
 | 
| 
393
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     s/&/&/g;  | 
| 
394
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
109
 | 
     return $_;  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item unquote  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->unquote($str);  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 HTML unquote.  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub unquote {  | 
| 
406
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
  
1
  
 | 
2157
 | 
     my $this = shift;  | 
| 
407
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     $_ = shift;  | 
| 
408
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     if (/\A([\'\"])(.*)\1\Z/) {  | 
| 
409
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	$this->unescape($2);  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# none escaped  | 
| 
412
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$_;  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item rewrite  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $util->rewrite($str);  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 standard rewrite method.  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 do remove_tag and unescape.  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub rewrite {  | 
| 
426
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
17
 | 
     my $this = shift;  | 
| 
427
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     $this->unescape($this->remove_tag(shift));  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item please_override_this  | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   sub foo { shift->please_override_this }  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 universal 'please override this' error method.  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub please_override_this {  | 
| 
439
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     my $this = shift;  | 
| 
440
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     (my $funcname = (caller(1))[3]) =~ s/^.*::(.+?)$/$1/;  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
442
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     die sprintf 'please override %s->%s!', (ref $this || $this), $funcname;  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |