line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Web::Query; |
2
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:TOKUHIROM'; |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
$Web::Query::VERSION = '0.39'; |
5
|
46
|
|
|
46
|
|
3474439
|
use strict; |
|
46
|
|
|
|
|
401
|
|
|
46
|
|
|
|
|
1376
|
|
6
|
46
|
|
|
46
|
|
245
|
use warnings; |
|
46
|
|
|
|
|
94
|
|
|
46
|
|
|
|
|
1106
|
|
7
|
46
|
|
|
46
|
|
1233
|
use 5.008001; |
|
46
|
|
|
|
|
166
|
|
8
|
46
|
|
|
46
|
|
19641
|
use parent qw/Exporter/; |
|
46
|
|
|
|
|
12744
|
|
|
46
|
|
|
|
|
255
|
|
9
|
46
|
|
|
46
|
|
27343
|
use HTML::TreeBuilder::XPath; |
|
46
|
|
|
|
|
3069708
|
|
|
46
|
|
|
|
|
404
|
|
10
|
46
|
|
|
46
|
|
32264
|
use LWP::UserAgent; |
|
46
|
|
|
|
|
2051192
|
|
|
46
|
|
|
|
|
2147
|
|
11
|
46
|
|
|
46
|
|
23392
|
use HTML::Selector::XPath 0.20 qw/selector_to_xpath/; |
|
46
|
|
|
|
|
125334
|
|
|
46
|
|
|
|
|
3187
|
|
12
|
46
|
|
|
46
|
|
386
|
use Scalar::Util qw/blessed refaddr/; |
|
46
|
|
|
|
|
121
|
|
|
46
|
|
|
|
|
2793
|
|
13
|
46
|
|
|
46
|
|
316
|
use HTML::Entities qw/encode_entities/; |
|
46
|
|
|
|
|
129
|
|
|
46
|
|
|
|
|
3236
|
|
14
|
|
|
|
|
|
|
|
15
|
46
|
|
|
46
|
|
344
|
use List::Util 1.44 qw/ reduce uniq /; |
|
46
|
|
|
|
|
725
|
|
|
46
|
|
|
|
|
3159
|
|
16
|
46
|
|
|
46
|
|
341
|
use Scalar::Util qw/ refaddr /; |
|
46
|
|
|
|
|
146
|
|
|
46
|
|
|
|
|
86639
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our @EXPORT = qw/wq/; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our $RESPONSE; |
21
|
|
|
|
|
|
|
|
22
|
90
|
|
|
90
|
1
|
130719
|
sub wq { Web::Query->new(@_) } |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our $UserAgent = LWP::UserAgent->new(); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub __ua { |
27
|
8
|
|
33
|
8
|
|
37
|
$UserAgent ||= LWP::UserAgent->new( agent => __PACKAGE__ . "/" . __PACKAGE__->VERSION ); |
28
|
8
|
|
|
|
|
44
|
$UserAgent; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub _build_tree { |
32
|
122
|
|
|
122
|
|
274
|
my( $self, $options ) = @_; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $no_space_compacting = ref $self ? $self->{no_space_compacting} |
35
|
122
|
100
|
|
|
|
518
|
: ref $options eq 'HASH' ? $options->{no_space_compacting} : 0; |
|
|
100
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
37
|
122
|
|
|
|
|
859
|
my $tree = HTML::TreeBuilder::XPath->new( |
38
|
|
|
|
|
|
|
no_space_compacting => $no_space_compacting |
39
|
|
|
|
|
|
|
); |
40
|
122
|
|
|
|
|
35772
|
$tree->ignore_unknown(0); |
41
|
122
|
|
|
|
|
3140
|
$tree->store_comments(1); |
42
|
122
|
|
|
|
|
1122
|
$tree; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub new { |
46
|
477
|
|
|
477
|
1
|
68868
|
my ($class, $stuff, $options) = @_; |
47
|
|
|
|
|
|
|
|
48
|
477
|
100
|
|
|
|
1232
|
my $self = $class->_resolve_new($stuff,$options) |
49
|
|
|
|
|
|
|
or return undef; |
50
|
|
|
|
|
|
|
|
51
|
474
|
100
|
|
|
|
2211
|
$self->{indent} = $options->{indent} if $options->{indent}; |
52
|
|
|
|
|
|
|
|
53
|
474
|
|
|
|
|
902
|
$self->{no_space_compacting} = $options->{no_space_compacting}; |
54
|
|
|
|
|
|
|
|
55
|
474
|
|
|
|
|
1614
|
return $self; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub _resolve_new { |
59
|
477
|
|
|
477
|
|
1042
|
my( $class, $stuff, $options) = @_; |
60
|
|
|
|
|
|
|
|
61
|
477
|
100
|
|
|
|
1198
|
return $class->new_from_element([],undef,$options) unless defined $stuff; |
62
|
|
|
|
|
|
|
|
63
|
475
|
100
|
|
|
|
1582
|
if (blessed $stuff) { |
64
|
268
|
100
|
|
|
|
1057
|
return $class->new_from_element([$stuff],undef,$options) |
65
|
|
|
|
|
|
|
if $stuff->isa('HTML::Element'); |
66
|
|
|
|
|
|
|
|
67
|
119
|
100
|
|
|
|
376
|
return $class->new_from_url($stuff->as_string,$options) |
68
|
|
|
|
|
|
|
if $stuff->isa('URI'); |
69
|
|
|
|
|
|
|
|
70
|
117
|
50
|
|
|
|
363
|
return $class->new_from_element($stuff->{trees}, undef, $options) |
71
|
|
|
|
|
|
|
if $stuff->isa($class); |
72
|
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
0
|
die "Unknown source type: $stuff"; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
207
|
100
|
|
|
|
574
|
return $class->new_from_element($stuff,undef,$options) if ref $stuff eq 'ARRAY'; |
77
|
|
|
|
|
|
|
|
78
|
205
|
100
|
|
|
|
793
|
return $class->new_from_url($stuff,$options) if $stuff =~ m{^(?:https?|file)://}; |
79
|
|
|
|
|
|
|
|
80
|
199
|
100
|
|
|
|
1511
|
return $class->new_from_html($stuff,$options) if $stuff =~ /<.*?>/; |
81
|
|
|
|
|
|
|
|
82
|
18
|
50
|
33
|
|
|
552
|
return $class->new_from_file($stuff,$options) if $stuff !~ /\n/ && -f $stuff; |
83
|
|
|
|
|
|
|
|
84
|
0
|
|
|
|
|
0
|
die "Unknown source type: $stuff"; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub new_from_url { |
88
|
8
|
|
|
8
|
1
|
36
|
my ($class, $url,$options) = @_; |
89
|
|
|
|
|
|
|
|
90
|
8
|
|
|
|
|
37
|
$RESPONSE = __ua()->get($url); |
91
|
|
|
|
|
|
|
|
92
|
8
|
100
|
|
|
|
72563
|
return undef unless $RESPONSE->is_success; |
93
|
|
|
|
|
|
|
|
94
|
5
|
|
|
|
|
75
|
return $class->new_from_html($RESPONSE->decoded_content,$options); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub new_from_file { |
98
|
18
|
|
|
18
|
1
|
63
|
my ($class, $fname, $options) = @_; |
99
|
18
|
|
|
|
|
57
|
my $tree = $class->_build_tree($options); |
100
|
18
|
|
|
|
|
68
|
$tree->parse_file($fname); |
101
|
18
|
|
|
|
|
29595
|
my $self = $class->new_from_element([$tree->disembowel],undef,$options); |
102
|
18
|
|
|
|
|
74
|
$self->{need_delete}++; |
103
|
18
|
|
|
|
|
93
|
return $self; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub new_from_html { |
107
|
218
|
|
|
218
|
1
|
46873
|
my ($class, $html,$options) = @_; |
108
|
218
|
|
|
|
|
661
|
my $tree = $class->_build_tree($options); |
109
|
218
|
|
|
|
|
1290
|
$tree->parse_content($html); |
110
|
|
|
|
|
|
|
my $self = $class->new_from_element([ |
111
|
|
|
|
|
|
|
map { |
112
|
218
|
100
|
|
|
|
173681
|
ref $_ ? $_ : bless { _content => $_ }, 'HTML::TreeBuilder::XPath::TextNode' |
|
237
|
|
|
|
|
18118
|
|
113
|
|
|
|
|
|
|
} $tree->disembowel |
114
|
|
|
|
|
|
|
],undef,$options); |
115
|
218
|
|
|
|
|
934
|
$self->{need_delete}++; |
116
|
218
|
|
|
|
|
2112
|
return $self; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub new_from_element { |
120
|
1268
|
|
|
1268
|
1
|
3925
|
my $self_or_class = shift; |
121
|
|
|
|
|
|
|
|
122
|
1268
|
100
|
|
|
|
3032
|
my $trees = ref $_[0] eq 'ARRAY' ? $_[0] : [$_[0]]; |
123
|
|
|
|
|
|
|
|
124
|
1268
|
|
66
|
|
|
9572
|
return bless { trees => [ @$trees ], before => $_[1] }, |
125
|
|
|
|
|
|
|
ref $self_or_class || $self_or_class; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub end { |
129
|
108
|
|
|
108
|
1
|
362
|
my $self = shift; |
130
|
108
|
|
|
|
|
263
|
return $self->{before}; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub size { |
134
|
142
|
|
|
142
|
1
|
7320
|
my $self = shift; |
135
|
142
|
|
|
|
|
198
|
return scalar(@{$self->{trees}}); |
|
142
|
|
|
|
|
551
|
|
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub parent { |
139
|
4
|
|
|
4
|
1
|
8
|
my $self = shift; |
140
|
|
|
|
|
|
|
|
141
|
4
|
|
|
|
|
10
|
my @new = map { $_->parent } @{$self->{trees}}; |
|
4
|
|
|
|
|
13
|
|
|
4
|
|
|
|
|
8
|
|
142
|
|
|
|
|
|
|
|
143
|
4
|
|
33
|
|
|
85
|
return (ref $self || $self)->new_from_element(\@new, $self); |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub first { |
147
|
12
|
|
|
12
|
1
|
38
|
my $self = shift; |
148
|
12
|
|
|
|
|
42
|
return $self->eq(0); |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub last { |
152
|
6
|
|
|
6
|
1
|
27
|
my $self = shift; |
153
|
6
|
|
|
|
|
17
|
return $self->eq(-1); |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub get { |
157
|
89
|
|
|
89
|
1
|
214
|
my ($self, $index) = @_; |
158
|
89
|
|
|
|
|
274
|
return $self->{trees}[$index]; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub eq { |
162
|
26
|
|
|
26
|
0
|
59
|
my ($self, $index) = @_; |
163
|
26
|
|
33
|
|
|
145
|
return (ref $self || $self)->new_from_element([$self->{trees}[$index] || ()], $self); |
|
|
|
66
|
|
|
|
|
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub find { |
167
|
295
|
|
|
295
|
1
|
26566
|
my ($self, $selector) = @_; |
168
|
|
|
|
|
|
|
|
169
|
295
|
100
|
|
|
|
1236
|
my $xpath = ref $selector ? $$selector : selector_to_xpath($selector, root => './'); |
170
|
295
|
|
|
|
|
30370
|
my @new = map { eval{ $_->findnodes($xpath) } } @{$self->{trees}}; |
|
311
|
|
|
|
|
28210
|
|
|
311
|
|
|
|
|
1182
|
|
|
295
|
|
|
|
|
824
|
|
171
|
|
|
|
|
|
|
|
172
|
295
|
|
33
|
|
|
525985
|
return (ref $self || $self)->new_from_element(\@new, $self); |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub contents { |
176
|
20
|
|
|
20
|
1
|
70
|
my ($self, $selector) = @_; |
177
|
|
|
|
|
|
|
|
178
|
20
|
|
|
|
|
29
|
my @new = map { $_->content_list } @{$self->{trees}}; |
|
28
|
|
|
|
|
290
|
|
|
20
|
|
|
|
|
45
|
|
179
|
|
|
|
|
|
|
|
180
|
20
|
100
|
|
|
|
1917
|
if ($selector) { |
181
|
2
|
50
|
|
|
|
21
|
my $xpath = ref $selector ? $$selector : selector_to_xpath($selector); |
182
|
2
|
|
|
|
|
280
|
@new = grep { $_->matches($xpath) } @new; |
|
6
|
|
|
|
|
2934
|
|
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
20
|
|
33
|
|
|
524
|
return (ref $self || $self)->new_from_element(\@new, $self); |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
sub as_html { |
189
|
105
|
|
|
105
|
1
|
6053
|
my $self = shift; |
190
|
105
|
|
|
|
|
250
|
my %args = @_; |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
my @html = map { |
193
|
|
|
|
|
|
|
ref $_ ? ( $_->isa('HTML::TreeBuilder::XPath::TextNode') || $_->isa('HTML::TreeBuilder::XPath::CommentNode' ) ) |
194
|
|
|
|
|
|
|
? $_->getValue |
195
|
|
|
|
|
|
|
: $_->as_HTML( q{&<>'"}, $self->{indent}, {} ) |
196
|
148
|
100
|
66
|
|
|
9957
|
: $_ |
|
|
100
|
|
|
|
|
|
197
|
105
|
|
|
|
|
167
|
} @{$self->{trees}}; |
|
105
|
|
|
|
|
257
|
|
198
|
|
|
|
|
|
|
|
199
|
105
|
100
|
|
|
|
27808
|
return join $args{join}, @html if defined $args{join}; |
200
|
|
|
|
|
|
|
|
201
|
85
|
100
|
|
|
|
688
|
return wantarray ? @html : $html[0]; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub html { |
205
|
48
|
|
|
48
|
1
|
165
|
my $self = shift; |
206
|
|
|
|
|
|
|
|
207
|
48
|
100
|
|
|
|
125
|
if (@_) { |
208
|
|
|
|
|
|
|
map { |
209
|
4
|
|
|
|
|
20
|
$_->delete_content; |
210
|
4
|
|
|
|
|
59
|
my $tree = $self->_build_tree; |
211
|
|
|
|
|
|
|
|
212
|
4
|
|
|
|
|
25
|
$tree->parse_content($_[0]); |
213
|
4
|
|
|
|
|
1364
|
$_->push_content($tree->disembowel); |
214
|
4
|
|
|
|
|
8
|
} @{$self->{trees}}; |
|
4
|
|
|
|
|
15
|
|
215
|
4
|
|
|
|
|
510
|
return $self; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
44
|
|
|
|
|
74
|
my @html; |
219
|
44
|
|
|
|
|
73
|
for my $t ( @{$self->{trees}} ) { |
|
44
|
|
|
|
|
111
|
|
220
|
|
|
|
|
|
|
push @html, join '', map { |
221
|
|
|
|
|
|
|
ref $_ ? $_->as_HTML( q{&<>'"}, $self->{indent}, {}) |
222
|
66
|
100
|
|
|
|
6782
|
: encode_entities($_) |
223
|
44
|
|
|
|
|
74
|
} eval { $t->content_list }; |
|
44
|
|
|
|
|
136
|
|
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
44
|
100
|
|
|
|
12214
|
return wantarray ? @html : $html[0]; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub text { |
230
|
88
|
|
|
88
|
1
|
2188
|
my $self = shift; |
231
|
|
|
|
|
|
|
|
232
|
88
|
100
|
|
|
|
210
|
if (@_) { |
233
|
2
|
|
|
|
|
4
|
map { $_->delete_content; $_->push_content($_[0]) } @{$self->{trees}}; |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
28
|
|
|
2
|
|
|
|
|
5
|
|
234
|
2
|
|
|
|
|
61
|
return $self; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
my @html = map { |
238
|
86
|
100
|
|
|
|
376
|
ref $_ ? $_->as_text : $_ |
239
|
86
|
|
|
|
|
131
|
} @{$self->{trees}}; |
|
86
|
|
|
|
|
181
|
|
240
|
86
|
100
|
|
|
|
3133
|
return wantarray ? @html : $html[0]; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub attr { |
244
|
100
|
|
|
100
|
1
|
11936
|
my $self = shift; |
245
|
|
|
|
|
|
|
|
246
|
100
|
100
|
|
|
|
252
|
if ( @_ == 1 ) { |
247
|
|
|
|
|
|
|
return wantarray |
248
|
16
|
|
|
|
|
155
|
? map { $_->attr(@_) } @{$self->{trees}} |
|
10
|
|
|
|
|
26
|
|
249
|
66
|
100
|
|
|
|
153
|
: eval { $self->{trees}[0]->attr(@_) } |
|
56
|
|
|
|
|
197
|
|
250
|
|
|
|
|
|
|
; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
34
|
|
|
|
|
124
|
while( my( $attr, $value ) = splice @_, 0, 2 ) { |
254
|
36
|
100
|
|
|
|
156
|
my $code = ref $value eq 'CODE' ? $value : undef; |
255
|
|
|
|
|
|
|
|
256
|
36
|
|
|
|
|
49
|
for my $t ( @{$self->{trees}} ) { |
|
36
|
|
|
|
|
80
|
|
257
|
42
|
100
|
|
|
|
214
|
if ( $code ) { |
258
|
46
|
|
|
46
|
|
417
|
no warnings 'uninitialized'; |
|
46
|
|
|
|
|
126
|
|
|
46
|
|
|
|
|
54115
|
|
259
|
4
|
|
|
|
|
16
|
my $orig = $_ = $t->attr($attr); |
260
|
4
|
|
|
|
|
63
|
$code->(); |
261
|
4
|
100
|
|
|
|
23
|
next if $orig eq $_; |
262
|
2
|
|
|
|
|
4
|
$value = $_; |
263
|
|
|
|
|
|
|
} |
264
|
40
|
|
|
|
|
110
|
$t->attr($attr => $value); |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
34
|
|
|
|
|
1810
|
return $self; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub id { |
272
|
20
|
|
|
20
|
1
|
40
|
my $self = shift; |
273
|
|
|
|
|
|
|
|
274
|
20
|
100
|
|
|
|
43
|
if ( @_ ) { |
275
|
8
|
|
|
|
|
17
|
my $new_id = shift; |
276
|
|
|
|
|
|
|
|
277
|
8
|
50
|
|
|
|
23
|
return $self if $self->size == 0; |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
return $self->each(sub{ |
280
|
6
|
|
|
6
|
|
19
|
$_->attr( id => $new_id->(@_) ) |
281
|
8
|
100
|
|
|
|
45
|
}) if ref $new_id eq 'CODE'; |
282
|
|
|
|
|
|
|
|
283
|
6
|
100
|
|
|
|
14
|
if ( $self->size == 1 ) { |
284
|
2
|
|
|
|
|
9
|
$self->attr( id => $new_id ); |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
else { |
287
|
4
|
|
|
|
|
16
|
return $self->first->attr( id => $new_id ); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
else { |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
return wantarray |
294
|
8
|
|
|
|
|
65
|
? map { $_->attr('id') } @{$self->{trees}} |
|
4
|
|
|
|
|
13
|
|
295
|
12
|
100
|
|
|
|
26
|
: eval { $self->{trees}[0]->attr('id') } |
|
8
|
|
|
|
|
25
|
|
296
|
|
|
|
|
|
|
; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
sub name { |
301
|
12
|
|
|
12
|
1
|
23
|
my $self = shift; |
302
|
12
|
|
|
|
|
26
|
$self->attr( 'name', @_ ); |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
sub data { |
306
|
4
|
|
|
4
|
1
|
8
|
my $self = shift; |
307
|
4
|
|
|
|
|
8
|
my $name = shift; |
308
|
4
|
|
|
|
|
19
|
$self->attr( join( '-', 'data', $name ), @_ ); |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
sub tagname { |
312
|
12
|
|
|
12
|
1
|
42
|
my $self = shift; |
313
|
12
|
100
|
|
|
|
91
|
my @retval = map { $_ eq '~comment' ? '#comment' : $_ } |
314
|
12
|
100
|
|
|
|
58
|
map { ref $_ eq 'HTML::TreeBuilder::XPath::TextNode' ? '#text' |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
315
|
|
|
|
|
|
|
: ref $_ eq 'HTML::TreeBuilder::XPath::CommentNode' ? '#comment' |
316
|
|
|
|
|
|
|
: ref $_ ? $_->tag(@_) |
317
|
|
|
|
|
|
|
: '#text' |
318
|
|
|
|
|
|
|
; |
319
|
12
|
|
|
|
|
21
|
} @{$self->{trees}}; |
|
12
|
|
|
|
|
24
|
|
320
|
12
|
50
|
|
|
|
50
|
return wantarray ? @retval : $retval[0]; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub each { |
324
|
51
|
|
|
51
|
1
|
698
|
my ($self, $code) = @_; |
325
|
51
|
|
|
|
|
85
|
my $i = 0; |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
|
330
|
51
|
|
|
|
|
70
|
my @trees = @{ $self->{trees} }; |
|
51
|
|
|
|
|
123
|
|
331
|
51
|
|
|
|
|
127
|
for my $tree ( @trees ) { |
332
|
140
|
|
33
|
|
|
545
|
local $_ = (ref $self || $self)->new_from_element([$tree], $self); |
333
|
140
|
|
|
|
|
443
|
$code->($i++, $_); |
334
|
|
|
|
|
|
|
} |
335
|
51
|
|
|
|
|
205
|
return $self; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
sub map { |
339
|
87
|
|
|
87
|
1
|
221
|
my ($self, $code) = @_; |
340
|
87
|
|
|
|
|
139
|
my $i = 0; |
341
|
|
|
|
|
|
|
return +[map { |
342
|
125
|
|
|
|
|
185
|
my $tree = $_; |
343
|
125
|
|
33
|
|
|
410
|
local $_ = (ref $self || $self)->new($tree); |
344
|
125
|
|
|
|
|
294
|
$code->($i++, $_); |
345
|
87
|
|
|
|
|
136
|
} @{$self->{trees}}]; |
|
87
|
|
|
|
|
201
|
|
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
sub filter { |
349
|
64
|
|
|
64
|
1
|
3051
|
my $self = shift; |
350
|
|
|
|
|
|
|
|
351
|
64
|
|
|
|
|
87
|
my @new; |
352
|
|
|
|
|
|
|
|
353
|
64
|
100
|
|
|
|
161
|
if (ref($_[0]) eq 'CODE') { |
354
|
48
|
|
|
|
|
74
|
my $code = $_[0]; |
355
|
48
|
|
|
|
|
70
|
my $i = 0; |
356
|
|
|
|
|
|
|
@new = grep { |
357
|
78
|
|
|
|
|
116
|
my $tree = $_; |
358
|
78
|
|
33
|
|
|
258
|
local $_ = (ref $self || $self)->new_from_element($tree); |
359
|
78
|
|
|
|
|
192
|
$code->($i++, $_); |
360
|
48
|
|
|
|
|
1122
|
} @{$self->{trees}}; |
|
48
|
|
|
|
|
107
|
|
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
else { |
363
|
16
|
50
|
|
|
|
1085
|
my $xpath = ref $_[0] ? ${$_[0]} : selector_to_xpath($_[0]); |
|
0
|
|
|
|
|
0
|
|
364
|
16
|
|
|
|
|
1611
|
@new = grep { eval { $_->matches($xpath) } } @{$self->{trees}}; |
|
28
|
|
|
|
|
21502
|
|
|
28
|
|
|
|
|
106
|
|
|
16
|
|
|
|
|
45
|
|
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
64
|
|
33
|
|
|
8986
|
return (ref $self || $self)->new_from_element(\@new, $self); |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
sub _is_same_node { |
371
|
22
|
|
|
22
|
|
84
|
refaddr($_[1]) == refaddr($_[2]); |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub remove { |
375
|
30
|
|
|
30
|
1
|
59
|
my $self = shift; |
376
|
30
|
|
|
|
|
77
|
my $before = $self->end; |
377
|
|
|
|
|
|
|
|
378
|
30
|
|
|
|
|
80
|
while (defined $before) { |
379
|
28
|
|
|
|
|
61
|
@{$before->{trees}} = grep { |
380
|
40
|
|
|
|
|
60
|
my $el = $_; |
381
|
40
|
|
|
|
|
51
|
not grep { $self->_is_same_node( $el, $_ ) } @{$self->{trees}}; |
|
44
|
|
|
|
|
81
|
|
|
40
|
|
|
|
|
62
|
|
382
|
28
|
|
|
|
|
39
|
} @{$before->{trees}}; |
|
28
|
|
|
|
|
55
|
|
383
|
|
|
|
|
|
|
|
384
|
28
|
|
|
|
|
53
|
$before = $before->end; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
30
|
|
|
|
|
44
|
$_->delete for @{$self->{trees}}; |
|
30
|
|
|
|
|
106
|
|
388
|
30
|
|
|
|
|
1817
|
@{$self->{trees}} = (); |
|
30
|
|
|
|
|
71
|
|
389
|
|
|
|
|
|
|
|
390
|
30
|
|
|
|
|
100
|
$self; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
sub replace_with { |
394
|
10
|
|
|
10
|
1
|
23
|
my ( $self, $replacement ) = @_; |
395
|
|
|
|
|
|
|
|
396
|
10
|
|
|
|
|
15
|
my $i = 0; |
397
|
10
|
|
|
|
|
16
|
for my $node ( @{ $self->{trees} } ) { |
|
10
|
|
|
|
|
23
|
|
398
|
18
|
|
|
|
|
121
|
my $rep = $replacement; |
399
|
|
|
|
|
|
|
|
400
|
18
|
100
|
|
|
|
42
|
if ( ref $rep eq 'CODE' ) { |
401
|
8
|
|
33
|
|
|
27
|
local $_ = (ref $self || $self)->new($node); |
402
|
8
|
|
|
|
|
22
|
$rep = $rep->( $i++ => $_ ); |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
18
|
100
|
33
|
|
|
65
|
$rep = (ref $self || $self)->new_from_html( $rep ) |
406
|
|
|
|
|
|
|
unless ref $rep; |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
|
410
|
18
|
|
|
|
|
111
|
my $r = $rep->{trees}->[0]; |
411
|
46
|
|
|
46
|
|
696
|
{ no warnings; |
|
46
|
|
|
|
|
227
|
|
|
46
|
|
|
|
|
55845
|
|
|
18
|
|
|
|
|
1060
|
|
412
|
18
|
100
|
|
|
|
66
|
$r = $r->clone if ref $r; |
413
|
|
|
|
|
|
|
} |
414
|
18
|
100
|
66
|
|
|
625
|
$r->parent( $node->parent ) if ref $r and $node->parent; |
415
|
|
|
|
|
|
|
|
416
|
18
|
|
|
|
|
999
|
$node->replace_with( $r ); |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
10
|
100
|
33
|
|
|
311
|
$replacement->remove if ref $replacement eq (ref $self || $self); |
420
|
|
|
|
|
|
|
|
421
|
10
|
|
|
|
|
34
|
return $self; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub append { |
425
|
2
|
|
|
2
|
1
|
6
|
my ($self, $stuff) = @_; |
426
|
2
|
|
33
|
|
|
9
|
$stuff = (ref $self || $self)->new($stuff); |
427
|
|
|
|
|
|
|
|
428
|
2
|
|
|
|
|
13
|
foreach my $t (@{$self->{trees}}) { |
|
2
|
|
|
|
|
11
|
|
429
|
4
|
|
|
|
|
209
|
$t->push_content($_) for ref($t)->clone_list(@{$stuff->{trees}}); |
|
4
|
|
|
|
|
16
|
|
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
|
432
|
2
|
|
|
|
|
131
|
$self; |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
sub prepend { |
436
|
2
|
|
|
2
|
1
|
6
|
my ($self, $stuff) = @_; |
437
|
2
|
|
33
|
|
|
9
|
$stuff = (ref $self || $self)->new($stuff); |
438
|
|
|
|
|
|
|
|
439
|
2
|
|
|
|
|
5
|
foreach my $t (@{$self->{trees}}) { |
|
2
|
|
|
|
|
8
|
|
440
|
4
|
|
|
|
|
216
|
$t->unshift_content($_) for ref($t)->clone_list(@{$stuff->{trees}}); |
|
4
|
|
|
|
|
15
|
|
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
2
|
|
|
|
|
133
|
$self; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
sub before { |
448
|
2
|
|
|
2
|
1
|
7
|
my ($self, $stuff) = @_; |
449
|
2
|
|
33
|
|
|
9
|
$stuff = (ref $self || $self)->new($stuff); |
450
|
|
|
|
|
|
|
|
451
|
2
|
|
|
|
|
6
|
foreach my $t (@{$self->{trees}}) { |
|
2
|
|
|
|
|
6
|
|
452
|
4
|
|
|
|
|
244
|
$t->preinsert(ref($t)->clone_list(@{$stuff->{trees}})); |
|
4
|
|
|
|
|
19
|
|
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
2
|
|
|
|
|
154
|
$self; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
sub after { |
460
|
2
|
|
|
2
|
1
|
6
|
my ($self, $stuff) = @_; |
461
|
2
|
|
33
|
|
|
8
|
$stuff = (ref $self || $self)->new($stuff); |
462
|
|
|
|
|
|
|
|
463
|
2
|
|
|
|
|
4
|
foreach my $t (@{$self->{trees}}) { |
|
2
|
|
|
|
|
5
|
|
464
|
4
|
|
|
|
|
252
|
$t->postinsert(ref($t)->clone_list(@{$stuff->{trees}})); |
|
4
|
|
|
|
|
16
|
|
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
2
|
|
|
|
|
158
|
$self; |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
sub insert_before { |
472
|
2
|
|
|
2
|
1
|
5
|
my ($self, $target) = @_; |
473
|
|
|
|
|
|
|
|
474
|
2
|
|
|
|
|
3
|
foreach my $t (@{$target->{trees}}) { |
|
2
|
|
|
|
|
6
|
|
475
|
4
|
|
|
|
|
234
|
$t->preinsert(ref($t)->clone_list(@{$self->{trees}})); |
|
4
|
|
|
|
|
14
|
|
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
|
478
|
2
|
|
|
|
|
145
|
$self; |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
sub insert_after { |
482
|
2
|
|
|
2
|
1
|
12
|
my ($self, $target) = @_; |
483
|
|
|
|
|
|
|
|
484
|
2
|
|
|
|
|
4
|
foreach my $t (@{$target->{trees}}) { |
|
2
|
|
|
|
|
6
|
|
485
|
4
|
|
|
|
|
251
|
$t->postinsert(ref($t)->clone_list(@{$self->{trees}})); |
|
4
|
|
|
|
|
13
|
|
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
|
488
|
2
|
|
|
|
|
153
|
$self; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
sub detach { |
492
|
2
|
|
|
2
|
1
|
6
|
my ($self) = @_; |
493
|
2
|
|
|
|
|
4
|
$_->detach for @{$self->{trees}}; |
|
2
|
|
|
|
|
12
|
|
494
|
2
|
|
|
|
|
221
|
$self; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
sub add_class { |
498
|
16
|
|
|
16
|
1
|
37
|
my ($self, $class) = @_; |
499
|
|
|
|
|
|
|
|
500
|
16
|
|
|
|
|
33
|
for (my $i = 0; $i < @{$self->{trees}}; $i++) { |
|
36
|
|
|
|
|
535
|
|
501
|
20
|
|
|
|
|
35
|
my $t = $self->{trees}->[$i]; |
502
|
20
|
|
100
|
|
|
49
|
my $current_class = $t->attr('class') || ''; |
503
|
|
|
|
|
|
|
|
504
|
20
|
100
|
|
|
|
315
|
my $classes = ref $class eq 'CODE' ? $class->($i, $current_class, $t) : $class; |
505
|
20
|
|
|
|
|
101
|
my @classes = split /\s+/, $classes; |
506
|
|
|
|
|
|
|
|
507
|
20
|
|
|
|
|
41
|
foreach (@classes) { |
508
|
32
|
100
|
|
|
|
537
|
$current_class .= " $_" unless $current_class =~ /(?:^|\s)$_(?:\s|$)/; |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
|
511
|
20
|
|
|
|
|
149
|
$current_class =~ s/(?:^\s*|\s*$)//g; |
512
|
20
|
|
|
|
|
49
|
$current_class =~ s/\s\s+/ /g; |
513
|
|
|
|
|
|
|
|
514
|
20
|
|
|
|
|
52
|
$t->attr('class', $current_class); |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
|
517
|
16
|
|
|
|
|
44
|
$self; |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
sub remove_class { |
522
|
16
|
|
|
16
|
1
|
36
|
my ($self, $class) = @_; |
523
|
|
|
|
|
|
|
|
524
|
16
|
|
|
|
|
33
|
for (my $i = 0; $i < @{$self->{trees}}; $i++) { |
|
36
|
|
|
|
|
474
|
|
525
|
20
|
|
|
|
|
32
|
my $t = $self->{trees}->[$i]; |
526
|
20
|
|
|
|
|
49
|
my $current_class = $t->attr('class'); |
527
|
20
|
50
|
|
|
|
284
|
next unless defined $current_class; |
528
|
|
|
|
|
|
|
|
529
|
20
|
100
|
|
|
|
58
|
my $classes = ref $class eq 'CODE' ? $class->($i, $current_class, $t) : $class; |
530
|
20
|
|
|
|
|
78
|
my @remove_classes = split /\s+/, $classes; |
531
|
|
|
|
|
|
|
my @final = grep { |
532
|
20
|
|
|
|
|
90
|
my $existing_class = $_; |
|
42
|
|
|
|
|
55
|
|
533
|
42
|
|
|
|
|
51
|
not grep { $existing_class eq $_} @remove_classes; |
|
66
|
|
|
|
|
142
|
|
534
|
|
|
|
|
|
|
} split /\s+/, $current_class; |
535
|
|
|
|
|
|
|
|
536
|
20
|
|
|
|
|
64
|
$t->attr('class', join ' ', @final); |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
|
539
|
16
|
|
|
|
|
54
|
$self; |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
sub toggle_class { |
544
|
6
|
|
|
6
|
1
|
2692
|
my $self = shift; |
545
|
|
|
|
|
|
|
|
546
|
6
|
|
|
|
|
34
|
my @classes = uniq @_; |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
$self->each(sub{ |
549
|
18
|
|
|
18
|
|
35
|
for my $class ( @classes ) { |
550
|
24
|
100
|
|
|
|
46
|
my $method = $_->has_class($class) ? 'remove_class' : 'add_class'; |
551
|
24
|
|
|
|
|
1612
|
$_->$method($class); |
552
|
|
|
|
|
|
|
} |
553
|
6
|
|
|
|
|
45
|
}); |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
sub has_class { |
557
|
56
|
|
|
56
|
1
|
158
|
my ($self, $class) = @_; |
558
|
|
|
|
|
|
|
|
559
|
56
|
|
|
|
|
70
|
foreach my $t (@{$self->{trees}}) { |
|
56
|
|
|
|
|
110
|
|
560
|
46
|
|
|
46
|
|
423
|
no warnings 'uninitialized'; |
|
46
|
|
|
|
|
111
|
|
|
46
|
|
|
|
|
58754
|
|
561
|
58
|
100
|
|
|
|
209
|
return 1 if $t->attr('class') =~ /(?:^|\s)$class(?:\s|$)/; |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
|
564
|
28
|
|
|
|
|
597
|
return undef; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
sub clone { |
568
|
2
|
|
|
2
|
1
|
6
|
my ($self) = @_; |
569
|
2
|
|
|
|
|
4
|
my @clones = map { $_->clone } @{$self->{trees}}; |
|
2
|
|
|
|
|
19
|
|
|
2
|
|
|
|
|
4
|
|
570
|
2
|
|
33
|
|
|
200
|
return (ref $self || $self)->new_from_element(\@clones); |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
sub add { |
574
|
77
|
|
|
77
|
1
|
170
|
my ($self, @stuff) = @_; |
575
|
77
|
|
|
|
|
104
|
my @nodes; |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
|
578
|
77
|
100
|
100
|
|
|
600
|
if (@stuff == 2 && !ref $stuff[0] && $stuff[1]->isa('HTML::Element')) { |
|
|
|
66
|
|
|
|
|
579
|
2
|
50
|
|
|
|
17
|
my $xpath = ref $stuff[0] ? ${$stuff[0]} : selector_to_xpath($stuff[0]); |
|
0
|
|
|
|
|
0
|
|
580
|
2
|
|
|
|
|
215
|
push @nodes, $stuff[1]->findnodes( $xpath, root => './'); |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
else { |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
push @nodes, map { |
585
|
77
|
100
|
|
|
|
175
|
$self->{need_delete} = 1 if $_->{need_delete}; |
586
|
77
|
|
|
|
|
123
|
delete $_->{need_delete}; |
587
|
77
|
|
|
|
|
96
|
@{$_->{trees}}; |
|
77
|
|
|
|
|
178
|
|
588
|
75
|
|
33
|
|
|
135
|
} map { (ref $self || $self)->new($_) } @stuff; |
|
77
|
|
|
|
|
256
|
|
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
77
|
|
|
|
|
7175
|
my %ids = map { $self->_node_id($_) => 1 } @{ $self->{trees} }; |
|
120
|
|
|
|
|
256
|
|
|
77
|
|
|
|
|
182
|
|
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
$self->new_from_element( [ |
594
|
77
|
|
|
|
|
152
|
@{$self->{trees}}, grep { ! $ids{ $self->_node_id($_) } } @nodes |
|
77
|
|
|
|
|
143
|
|
|
95
|
|
|
|
|
186
|
|
595
|
|
|
|
|
|
|
], $self ); |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
sub _node_id { |
599
|
76
|
|
|
76
|
|
117
|
my( undef, $node ) = @_; |
600
|
76
|
|
|
|
|
1133
|
refaddr $node; |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
sub prev { |
604
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
605
|
1
|
|
|
|
|
2
|
my @new; |
606
|
1
|
|
|
|
|
2
|
for my $tree (@{$self->{trees}}) { |
|
1
|
|
|
|
|
3
|
|
607
|
2
|
|
|
|
|
51
|
push @new, $tree->getPreviousSibling; |
608
|
|
|
|
|
|
|
} |
609
|
1
|
|
33
|
|
|
35
|
return (ref $self || $self)->new_from_element(\@new, $self); |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
sub next { |
613
|
22
|
|
|
22
|
1
|
35
|
my $self = shift; |
614
|
|
|
|
|
|
|
|
615
|
22
|
|
|
|
|
30
|
my @new = grep { $_ } map { $_->getNextSibling } @{ $self->{trees} }; |
|
29
|
|
|
|
|
786
|
|
|
29
|
|
|
|
|
303
|
|
|
22
|
|
|
|
|
38
|
|
616
|
|
|
|
|
|
|
|
617
|
22
|
|
33
|
|
|
84
|
return (ref $self || $self)->new_from_element(\@new, $self); |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
sub match { |
621
|
73
|
|
|
73
|
1
|
124
|
my( $self, $selector ) = @_; |
622
|
|
|
|
|
|
|
|
623
|
73
|
50
|
|
|
|
210
|
my $xpath = ref $selector ? $$selector : selector_to_xpath($selector); |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
my $results = $self->map(sub{ |
626
|
81
|
|
|
81
|
|
148
|
my(undef,$e) = @_; |
627
|
81
|
50
|
|
|
|
168
|
return 0 unless ref $e; |
628
|
81
|
|
|
|
|
167
|
return !!$e->get(0)->matches($xpath); |
629
|
73
|
|
|
|
|
6347
|
}); |
630
|
|
|
|
|
|
|
|
631
|
73
|
100
|
|
|
|
472
|
return wantarray ? @$results : $results->[0]; |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
sub not { |
635
|
38
|
|
|
38
|
1
|
75
|
my( $self, $selector ) = @_; |
636
|
|
|
|
|
|
|
|
637
|
38
|
|
|
|
|
66
|
my $class = ref $self; |
638
|
|
|
|
|
|
|
|
639
|
38
|
50
|
|
|
|
115
|
my $xpath = ref $selector ? $$selector : selector_to_xpath($selector); |
640
|
38
|
|
|
46
|
|
3297
|
$self->filter(sub { ! grep { $_->matches($xpath) } grep { ref $_ } $class->new($_)->{trees}[0] } ); |
|
46
|
|
|
|
|
106
|
|
|
46
|
|
|
|
|
139
|
|
|
46
|
|
|
|
|
96
|
|
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
sub and_back { |
644
|
2
|
|
|
2
|
1
|
32
|
my $self = shift; |
645
|
|
|
|
|
|
|
|
646
|
2
|
|
|
|
|
13
|
$self->add( $self->end ); |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
sub next_until { |
650
|
10
|
|
|
10
|
1
|
24
|
my( $self, $selector ) = @_; |
651
|
|
|
|
|
|
|
|
652
|
10
|
|
|
|
|
16
|
my $class = ref $self; |
653
|
10
|
|
|
|
|
20
|
my $collection = $class->new_from_element([],$self); |
654
|
|
|
|
|
|
|
|
655
|
10
|
|
|
|
|
45
|
my $next = $self->next->not($selector); |
656
|
10
|
|
|
|
|
62
|
while( $next->size ) { |
657
|
26
|
|
|
|
|
57
|
$collection = $collection->add($next); |
658
|
26
|
|
|
|
|
68
|
$next = $next->next->not( $selector ); |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
|
662
|
10
|
|
|
|
|
25
|
$collection->{before} = $self; |
663
|
|
|
|
|
|
|
|
664
|
10
|
|
|
|
|
18
|
return $collection; |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
sub split { |
668
|
6
|
|
|
6
|
0
|
21
|
my( $self, $selector, %args ) = @_; |
669
|
|
|
|
|
|
|
|
670
|
6
|
|
|
|
|
9
|
my @current; |
671
|
|
|
|
|
|
|
my @list; |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
$self->contents->each(sub{ |
674
|
69
|
|
|
69
|
|
120
|
my(undef,$e)=@_; |
675
|
|
|
|
|
|
|
|
676
|
69
|
100
|
|
|
|
126
|
if( $e->match($selector) ) { |
677
|
18
|
|
|
|
|
41
|
push @list, [ @current ]; |
678
|
18
|
|
|
|
|
63
|
@current = ( $e ); |
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
else { |
681
|
51
|
100
|
|
|
|
102
|
if ( $current[1] ) { |
682
|
30
|
|
|
|
|
80
|
$current[1] = $current[1]->add($e); |
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
else { |
685
|
21
|
|
|
|
|
53
|
$current[1] = $e; |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
} |
688
|
6
|
|
|
|
|
17
|
}); |
689
|
6
|
|
|
|
|
44
|
push @list, [ @current ]; |
690
|
|
|
|
|
|
|
|
691
|
6
|
100
|
|
|
|
17
|
if( $args{skip_leading} ) { |
692
|
2
|
|
|
|
|
5
|
@list = grep { $_->[0] } @list; |
|
8
|
|
|
|
|
17
|
|
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
|
695
|
6
|
100
|
|
|
|
15
|
unless ( $args{pairs} ) { |
696
|
2
|
|
|
5
|
|
7
|
@list = map { reduce { $a->add($b) } grep { $_ } @$_ } @list; |
|
8
|
|
|
|
|
29
|
|
|
5
|
|
|
|
|
12
|
|
|
15
|
|
|
|
|
53
|
|
697
|
|
|
|
|
|
|
} |
698
|
|
|
|
|
|
|
|
699
|
6
|
|
|
|
|
21
|
return @list; |
700
|
|
|
|
|
|
|
} |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
sub last_response { |
703
|
4
|
|
|
4
|
1
|
3418
|
return $RESPONSE; |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
sub DESTROY { |
707
|
1269
|
100
|
|
1269
|
|
282917
|
return unless $_[0]->{need_delete}; |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
710
|
237
|
|
|
|
|
430
|
local $@; |
711
|
237
|
|
|
|
|
384
|
eval { $_->delete } for @{$_[0]->{trees}}; |
|
237
|
|
|
|
|
770
|
|
|
254
|
|
|
|
|
1424
|
|
712
|
|
|
|
|
|
|
} |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
1; |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
__END__ |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=pod |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=encoding UTF-8 |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
=head1 NAME |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
Web::Query - Yet another scraping library like jQuery |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=head1 VERSION |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
version 0.39 |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=head1 SYNOPSIS |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
use Web::Query; |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
wq('http://www.w3.org/TR/html401/') |
735
|
|
|
|
|
|
|
->find('div.head dt') |
736
|
|
|
|
|
|
|
->each(sub { |
737
|
|
|
|
|
|
|
my $i = shift; |
738
|
|
|
|
|
|
|
printf("%d %s\n", $i+1, $_->text); |
739
|
|
|
|
|
|
|
}); |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=head1 DESCRIPTION |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
Web::Query is a yet another scraping framework, have a jQuery like interface. |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
Yes, I know Ingy's L<pQuery>. But it's just a alpha quality. It doesn't works. |
746
|
|
|
|
|
|
|
Web::Query built at top of the CPAN modules, L<HTML::TreeBuilder::XPath>, L<LWP::UserAgent>, and L<HTML::Selector::XPath>. |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
So, this module uses L<HTML::Selector::XPath> and only supports the CSS 3 |
749
|
|
|
|
|
|
|
selector supported by that module. |
750
|
|
|
|
|
|
|
Web::Query doesn't support jQuery's extended queries(yet?). If a selector is |
751
|
|
|
|
|
|
|
passed as a scalar ref, it'll be taken as a straight XPath expression. |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
$wq( '<div><p>hello</p><p>there</p></div>' )->find( 'p' ); # css selector |
754
|
|
|
|
|
|
|
$wq( '<div><p>hello</p><p>there</p></div>' )->find( \'/div/p' ); # xpath selector |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
B<THIS LIBRARY IS UNDER DEVELOPMENT. ANY API MAY CHANGE WITHOUT NOTICE>. |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=for stopwords prev |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
=head1 FUNCTIONS |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
=over 4 |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
=item C<< wq($stuff) >> |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
This is a shortcut for C<< Web::Query->new($stuff) >>. This function is exported by default. |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
=back |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
=head1 METHODS |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=head2 CONSTRUCTORS |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
=over 4 |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=item my $q = Web::Query->new($stuff, \%options ) |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
Create new instance of Web::Query. You can make the instance from URL(http, https, file scheme), HTML in string, URL in string, L<URI> object, C<undef>, and either one |
779
|
|
|
|
|
|
|
L<HTML::Element> object or an array ref of them. |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
# all valid creators |
782
|
|
|
|
|
|
|
$q = Web::Query->new( 'http://techblog.babyl.ca' ); |
783
|
|
|
|
|
|
|
$q = Web::Query->new( '<p>foo</p>' ); |
784
|
|
|
|
|
|
|
$q = Web::Query->new( undef ); |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
This method throw the exception on unknown $stuff. |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
This method returns undefined value on non-successful response with URL. |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
Currently, the only two valid options are I<indent>, which will be used as |
791
|
|
|
|
|
|
|
the indentation string if the object is printed, and I<no_space_compacting>, |
792
|
|
|
|
|
|
|
which will prevent the compaction of whitespace characters in text blocks. |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
=item my $q = Web::Query->new_from_element($element: HTML::Element) |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
Create new instance of Web::Query from instance of L<HTML::Element>. |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=item C<< my $q = Web::Query->new_from_html($html: Str) >> |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
Create new instance of Web::Query from HTML. |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=item my $q = Web::Query->new_from_url($url: Str) |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
Create new instance of Web::Query from URL. |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
If the response is not success(It means /^20[0-9]$/), this method returns undefined value. |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
You can get a last result of response, use the C<< $Web::Query::RESPONSE >>. |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
Here is a best practical code: |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
my $url = 'http://example.com/'; |
813
|
|
|
|
|
|
|
my $q = Web::Query->new_from_url($url) |
814
|
|
|
|
|
|
|
or die "Cannot get a resource from $url: " . Web::Query->last_response()->status_line; |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
=item my $q = Web::Query->new_from_file($file_name: Str) |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
Create new instance of Web::Query from file name. |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
=back |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=head2 TRAVERSING |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
=head3 add |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
Returns a new object augmented with the new element(s). |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
=over 4 |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
=item add($html) |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
An HTML fragment to add to the set of matched elements. |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
=item add(@elements) |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
One or more @elements to add to the set of matched elements. |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
@elements that already are part of the set are not added a second time. |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
my $group = $wq->find('#foo'); # collection has 1 element |
841
|
|
|
|
|
|
|
$group = $group->add( '#bar', $wq ); # 2 elements |
842
|
|
|
|
|
|
|
$group->add( '#foo', $wq ); # still 2 elements |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=item add($wq) |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
An existing Web::Query object to add to the set of matched elements. |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
=item add($selector, $context) |
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
$selector is a string representing a selector expression to find additional elements to add to the set of matched elements. |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
$context is the point in the document at which the selector should begin matching |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
=back |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
=head3 contents |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
Get the immediate children of each element in the set of matched elements, including text and comment nodes. |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
=head3 each |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
Visit each nodes. C<< $i >> is a counter value, 0 origin. C<< $elem >> is iteration item. |
863
|
|
|
|
|
|
|
C<< $_ >> is localized by C<< $elem >>. |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
$q->each(sub { my ($i, $elem) = @_; ... }) |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
=head3 end |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
Back to the before context like jQuery. |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
=head3 filter |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
Reduce the elements to those that pass the function's test. |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
$q->filter(sub { my ($i, $elem) = @_; ... }) |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
=head3 find |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
Get the descendants of each element in the current set of matched elements, filtered by a selector. |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
my $q2 = $q->find($selector); # $selector is a CSS3 selector. |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
B<NOTE> If you want to match the element itself, use L</filter>. |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
B<INCOMPATIBLE CHANGE> |
886
|
|
|
|
|
|
|
From v0.14 to v0.19 (inclusive) find() also matched the element itself, which is not jQuery compatible. |
887
|
|
|
|
|
|
|
You can achieve that result using C<filter()>, C<add()> and C<find()>: |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
my $wq = wq('<div class="foo"><p class="foo">bar</p></div>'); # needed because we don't have a global document like jQuery does |
890
|
|
|
|
|
|
|
print $wq->filter('.foo')->add($wq->find('.foo'))->as_html; # <div class="foo"><p class="foo">bar</p></div><p class="foo">bar</p> |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
=head3 first |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
Return the first matching element. |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
This method constructs a new Web::Query object from the first matching element. |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
=head3 last |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
Return the last matching element. |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
This method constructs a new Web::Query object from the last matching element. |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
=head3 match($selector) |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
Returns a boolean indicating if the elements match the C<$selector>. |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
In scalar context returns only the boolean for the first element. |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
For the reverse of C<not()>, see C<filter()>. |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
=head3 not($selector) |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
Returns all the elements not matching the C<$selector>. |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
# $do_for_love will be every thing, except #that |
917
|
|
|
|
|
|
|
my $do_for_love = $wq->find('thing')->not('#that'); |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
=head3 and_back |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
Add the previous set of elements to the current one. |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
# get the h1 plus everything until the next h1 |
924
|
|
|
|
|
|
|
$wq->find('h1')->next_until('h1')->and_back; |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
=head3 map |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
Creates a new array with the results of calling a provided function on every element. |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
$q->map(sub { my ($i, $elem) = @_; ... }) |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
=head3 parent |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
Get the parent of each element in the current set of matched elements. |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
=head3 prev |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
Get the previous node of each element in the current set of matched elements. |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
my $prev = $q->prev; |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
=head3 next |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
Get the next node of each element in the current set of matched elements. |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
my $next = $q->next; |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
=head3 next_until( $selector ) |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
Get all subsequent siblings, up to (but not including) the next node matched C<$selector>. |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
=head2 MANIPULATION |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
=head3 add_class |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
Adds the specified class(es) to each of the set of matched elements. |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
# add class 'foo' to <p> elements |
959
|
|
|
|
|
|
|
wq('<div><p>foo</p><p>bar</p></div>')->find('p')->add_class('foo'); |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
=head3 toggle_class( @classes ) |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
Toggles the given class or classes on each of the element. I.e., if the element had the class, it'll be removed, |
964
|
|
|
|
|
|
|
and if it hadn't, it'll be added. |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
Classes are toggled once, no matter how many times they appear in the argument list. |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
$q->toggle_class( 'foo', 'foo', 'bar' ); |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
# equivalent to |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
$q->toggle_class('foo')->toggle_class('bar'); |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
# and not |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
$q->toggle_class('foo')->toggle_class('foo')->toggle_class('bar'); |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
=head3 after |
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
Insert content, specified by the parameter, after each element in the set of matched elements. |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
wq('<div><p>foo</p></div>')->find('p') |
983
|
|
|
|
|
|
|
->after('<b>bar</b>') |
984
|
|
|
|
|
|
|
->end |
985
|
|
|
|
|
|
|
->as_html; # <div><p>foo</p><b>bar</b></div> |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
The content can be anything accepted by L</new>. |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
=head3 append |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
Insert content, specified by the parameter, to the end of each element in the set of matched elements. |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
wq('<div></div>')->append('<p>foo</p>')->as_html; # <div><p>foo</p></div> |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
The content can be anything accepted by L</new>. |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
=head3 as_html |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
Returns the string representations of either the first or all elements, |
1000
|
|
|
|
|
|
|
depending if called in list or scalar context. |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
If given an argument C<join>, the string representations of the elements |
1003
|
|
|
|
|
|
|
will be concatenated with the given string. |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
wq( '<div><p>foo</p><p>bar</p></div>' ) |
1006
|
|
|
|
|
|
|
->find('p') |
1007
|
|
|
|
|
|
|
->as_html( join => '!' ); |
1008
|
|
|
|
|
|
|
# <p>foo</p>!<p>bar</p> |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
=head3 C< attr > |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
Get/set attribute values. |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
In getter mode, it'll return either the values of the attribute |
1015
|
|
|
|
|
|
|
for all elements of the set, or only the first one depending of the calling context. |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
my @values = $q->attr('style'); # style of all elements |
1018
|
|
|
|
|
|
|
my $first_value = $q->attr('style'); # style of first element |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
In setter mode, it'll set attributes value for all elements, and return back |
1021
|
|
|
|
|
|
|
the original object for easy chaining. |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
$q->attr( 'alt' => 'a picture' )->find( ... ); |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
# can pass more than 1 element too |
1026
|
|
|
|
|
|
|
$q->attr( alt => 'a picture', src => 'file:///...' ); |
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
The value passed for an attribute can be a code ref. In that case, |
1029
|
|
|
|
|
|
|
the code will be called with C<$_> set to the current attribute value. |
1030
|
|
|
|
|
|
|
If the code modifies C<$_>, the attribute will be updated with the new value. |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
$q->attr( alt => sub { $_ ||= 'A picture' } ); |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
=head3 C< id > |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
Get/set the elements's id attribute. |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
In getter mode, it behaves just like C<attr()>. |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
In setter mode, it behaves like C<attr()>, but with the following exceptions. |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
If the attribute value is a scalar, it'll be only assigned to |
1043
|
|
|
|
|
|
|
the first element of the set (as ids are supposed to be unique), and the returned object will only contain |
1044
|
|
|
|
|
|
|
that first element. |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
my $first_element = $q->id('the_one'); |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
It's possible to set the ids of all the elements by passing a sub to C<id()>. The sub is given the same arguments as for |
1049
|
|
|
|
|
|
|
C<each()>, and its return value is taken to be the new id of the elements. |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
$q->id( sub { my $i = shift; 'foo_' . $i } ); |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
=head3 C< name > |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
Get/set the elements's 'name' attribute. |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
my $name = $q->name; # equivalent to $q->attr( 'name' ); |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
$q->name( 'foo' ); # equivalent to $q->attr( name => 'foo' ); |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
=head3 C< data > |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
Get/set the elements's 'data-*name*' attributes. |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
my $data = $q->data('foo'); # equivalent to $q->attr( 'data-foo' ); |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
$q->data( 'foo' => 'bar' ); # equivalent to $q->attr( 'data-foo' => 'bar' ); |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
=head3 tagname |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
Get/Set the tag name of elements. |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
my $name = $q->tagname; |
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
$q->tagname($new_name); |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
=head3 before |
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
Insert content, specified by the parameter, before each element in the set of matched elements. |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
wq('<div><p>foo</p></div>')->find('p') |
1082
|
|
|
|
|
|
|
->before('<b>bar</b>') |
1083
|
|
|
|
|
|
|
->end |
1084
|
|
|
|
|
|
|
->as_html; # <div><b>bar</b><p>foo</p></div> |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
The content can be anything accepted by L</new>. |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
=head3 clone |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
Create a deep copy of the set of matched elements. |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
=head3 detach |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
Remove the set of matched elements from the DOM. |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
=head3 has_class |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
Determine whether any of the matched elements are assigned the given class. |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
=head3 C< html > |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
Get/Set the innerHTML. |
1103
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
my @html = $q->html(); |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
my $html = $q->html(); # 1st matching element only |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
$q->html('<p>foo</p>'); |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
=head3 insert_before |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
Insert every element in the set of matched elements before the target. |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
=head3 insert_after |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
Insert every element in the set of matched elements after the target. |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
=head3 C< prepend > |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
Insert content, specified by the parameter, to the beginning of each element in the set of matched elements. |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
=head3 remove |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
Delete the elements associated with the object from the DOM. |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
# remove all <blink> tags from the document |
1127
|
|
|
|
|
|
|
$q->find('blink')->remove; |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
=head3 remove_class |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
Remove a single class, multiple classes, or all classes from each element in the set of matched elements. |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
=head3 replace_with |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
Replace the elements of the object with the provided replacement. |
1136
|
|
|
|
|
|
|
The replacement can be a string, a C<Web::Query> object or an |
1137
|
|
|
|
|
|
|
anonymous function. The anonymous function is passed the index of the current |
1138
|
|
|
|
|
|
|
node and the node itself (with is also localized as C<$_>). |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
my $q = wq( '<p><b>Abra</b><i>cada</i><u>bra</u></p>' ); |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
$q->find('b')->replace_with('<a>Ocus</a>); |
1143
|
|
|
|
|
|
|
# <p><a>Ocus</a><i>cada</i><u>bra</u></p> |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
$q->find('u')->replace_with($q->find('b')); |
1146
|
|
|
|
|
|
|
# <p><i>cada</i><b>Abra</b></p> |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
$q->find('i')->replace_with(sub{ |
1149
|
|
|
|
|
|
|
my $name = $_->text; |
1150
|
|
|
|
|
|
|
return "<$name></$name>"; |
1151
|
|
|
|
|
|
|
}); |
1152
|
|
|
|
|
|
|
# <p><b>Abra</b><cada></cada><u>bra</u></p> |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
=head3 size |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
Return the number of elements in the Web::Query object. |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
wq('<div><p>foo</p><p>bar</p></div>')->find('p')->size; # 2 |
1159
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
=head3 text |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
Get/Set the text. |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
my @text = $q->text(); |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
my $text = $q->text(); # 1st matching element only |
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
$q->text('text'); |
1169
|
|
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
If called in a scalar context, only return the string representation |
1171
|
|
|
|
|
|
|
of the first element |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
=head2 OTHERS |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
=over 4 |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
=item Web::Query->last_response() |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
Returns last HTTP response status that generated by C<new_from_url()>. |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
=back |
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
=head1 HOW DO I CUSTOMIZE USER AGENT? |
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
You can specify your own instance of L<LWP::UserAgent>. |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
$Web::Query::UserAgent = LWP::UserAgent->new( agent => 'Mozilla/5.0' ); |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
=head1 FAQ AND TROUBLESHOOTING |
1190
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
=head2 How to find XML processing instructions in a document? |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
It's possible with L<Web::Query::LibXML> and by using an xpath expression |
1194
|
|
|
|
|
|
|
with C<find()>: |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
# find <?xml-stylesheet ... ?> |
1197
|
|
|
|
|
|
|
$q->find(\"//processing-instruction('xml-stylesheet')"); |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
However, note that the support for processing instructions |
1200
|
|
|
|
|
|
|
in L<HTML::TreeBuilder::LibXML::Node> is sketchy, so there |
1201
|
|
|
|
|
|
|
are methods like C<attr()> that won't work. |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
=head2 Can't get the content of script elements |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
The <script> tag is treated differently by L<HTML::TreeBuilder>, the |
1206
|
|
|
|
|
|
|
parser used by Web::Query. To retrieve the content, you can use either |
1207
|
|
|
|
|
|
|
the method C<html()> (with the caveat that the content will be escaped), |
1208
|
|
|
|
|
|
|
or use L<Web::Query::LibXML>, which parse the 'script' element differently. |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
my $node = "<script>var x = '<p>foo</p>';</script>"; |
1211
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
say Web::Query::wq( $node )->text; |
1213
|
|
|
|
|
|
|
# nothing is printed! |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
say Web::Query::wq( $node )->html; |
1216
|
|
|
|
|
|
|
# var x = '<p>foo</p>'; |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
say Web::Query::LibXML::wq( $node )->text; |
1219
|
|
|
|
|
|
|
# var x = '<p>foo</p>'; |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
say Web::Query::LibXML::wq( $node )->html; |
1222
|
|
|
|
|
|
|
# var x = '<p>foo</p>'; |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
=head1 INCOMPATIBLE CHANGES |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
=over 4 |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
=item 0.10 |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
new_from_url() is no longer throws exception on bad response from HTTP server. |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
=back |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
=head1 AUTHOR |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
Tokuhiro Matsuno E<lt>tokuhirom AAJKLFJEF@ GMAIL COME<gt> |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
=head1 SEE ALSO |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
=over |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
=item L<pQuery> |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
=item L<XML::LibXML::jQuery> |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
=back |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
=head1 LICENSE |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
Copyright (C) Tokuhiro Matsuno |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
1253
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
=head1 BUGS |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
Please report any bugs or feature requests on the bugtracker website |
1258
|
|
|
|
|
|
|
https://github.com/tokuhirom/Web-Query/issues |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
When submitting a bug or request, please include a test-file or a |
1261
|
|
|
|
|
|
|
patch to an existing test-file that illustrates the bug or desired |
1262
|
|
|
|
|
|
|
feature. |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
=cut |
1265
|
|
|
|
|
|
|
|