| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# ABSTRACT: https://www.jjwxc.net |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=pod |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=encoding utf8 |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 FUNCTION |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head2 make_query_request |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
#$type:作品,作者,主角,配角,其他 |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$parser->make_query_request( $type, $keyword ); |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=cut |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
package Novel::Robot::Parser::jjwxc; |
|
18
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
31
|
|
|
19
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
23
|
|
|
20
|
1
|
|
|
1
|
|
5
|
use utf8; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
6
|
|
|
21
|
1
|
|
|
1
|
|
23
|
use base 'Novel::Robot::Parser'; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
109
|
|
|
22
|
|
|
|
|
|
|
|
|
23
|
1
|
|
|
1
|
|
7
|
use Web::Scraper; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
12
|
|
|
24
|
1
|
|
|
1
|
|
103
|
use Encode; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
115
|
|
|
25
|
1
|
|
|
1
|
|
7
|
use Data::Dumper; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
1829
|
|
|
26
|
|
|
|
|
|
|
|
|
27
|
0
|
|
|
0
|
0
|
0
|
sub base_url { 'https://www.jjwxc.net' } |
|
28
|
|
|
|
|
|
|
|
|
29
|
0
|
|
|
0
|
0
|
0
|
sub domain { 'jjwxc.net' } |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub generate_novel_url { |
|
32
|
1
|
|
|
1
|
0
|
3
|
my ( $self, $index_url ) = @_; |
|
33
|
1
|
|
|
|
|
6
|
my ( $novelid ) = $index_url =~ m#novelid=(\d+)#; |
|
34
|
1
|
50
|
|
|
|
5
|
my $u = $novelid ? "https://m.jjwxc.net/book2/$novelid?more=0&whole=1" : $index_url; |
|
35
|
1
|
|
|
|
|
4
|
return $u; |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub parse_novel { |
|
39
|
1
|
|
|
1
|
0
|
4
|
my ( $self, $h ) = @_; |
|
40
|
1
|
|
|
|
|
129
|
$$h =~ s#本书霸王票读者排行.*##s; |
|
41
|
|
|
|
|
|
|
|
|
42
|
1
|
|
|
|
|
3
|
my %r; |
|
43
|
1
|
|
|
|
|
14
|
( $r{book}, $r{writer} ) = $$h =~ m#\s*《(.+?)》(.+?)_晋江文学城#s; |
|
44
|
|
|
|
|
|
|
|
|
45
|
1
|
|
|
|
|
47
|
my ( $cc ) = $$h =~ m#章节列表: .+?(#s; |
|
46
|
1
|
|
|
|
|
48
|
my @f = $cc =~ m#(.+?)#sg; |
|
47
|
1
|
|
|
|
|
6
|
my $max_chapter_num = ( $#f + 1 ) / 2; |
|
48
|
1
|
|
|
|
|
5
|
for my $i ( 1 .. $max_chapter_num ) { |
|
49
|
10
|
|
|
|
|
19
|
my $j = 2 * $i - 1; |
|
50
|
10
|
|
|
|
|
16
|
my $t = $f[$j]; |
|
51
|
10
|
|
|
|
|
51
|
$t =~ s/^\d+\.( )*//; |
|
52
|
10
|
|
|
|
|
46
|
$t =~ s/ / /g; |
|
53
|
10
|
|
|
|
|
56
|
$t =~ s/^.+>//; |
|
54
|
10
|
|
|
|
|
57
|
$t =~ s/\s+/ /g; |
|
55
|
|
|
|
|
|
|
|
|
56
|
10
|
|
|
|
|
18
|
my $ui = 2 * $i - 2; |
|
57
|
10
|
|
|
|
|
20
|
my $u = "https://m.jjwxc.net$f[$ui]"; |
|
58
|
10
|
|
|
|
|
15
|
push @{ $r{item_list} }, { id => $i, title => $t, url => $u }; |
|
|
10
|
|
|
|
|
41
|
|
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
1
|
|
|
|
|
9
|
return \%r; |
|
62
|
|
|
|
|
|
|
} ## end sub parse_novel |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub parse_novel_item { |
|
65
|
1
|
|
|
1
|
0
|
4
|
my ( $self, $h ) = @_; |
|
66
|
|
|
|
|
|
|
|
|
67
|
1
|
|
|
|
|
174
|
my ( $c ) = $$h =~ m#]+>.+?]*>(.+?)#s; |
|
68
|
|
|
|
|
|
|
|
|
69
|
1
|
|
50
|
|
|
18
|
return { content => $c || '' }; |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub parse_board { |
|
73
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
my $parse_writer = scraper { |
|
76
|
0
|
|
|
0
|
|
|
process_first '//tr[@valign="bottom"]//b', writer => 'TEXT'; |
|
77
|
0
|
|
|
|
|
|
}; |
|
78
|
0
|
|
|
|
|
|
my $ref = $parse_writer->scrape( $h ); |
|
79
|
|
|
|
|
|
|
|
|
80
|
0
|
|
|
|
|
|
$self->tidy_string( $ref, 'writer' ); |
|
81
|
0
|
|
|
|
|
|
return { writer => $ref->{writer} }; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub parse_board_item { |
|
85
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
|
86
|
0
|
|
|
|
|
|
my @book_list; |
|
87
|
0
|
|
|
|
|
|
my $series = '未分类'; |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
my $parse_writer = scraper { |
|
90
|
|
|
|
|
|
|
process '//tr[@bgcolor="#eefaee"]', 'book_list[]' => sub { |
|
91
|
0
|
|
|
|
|
|
my $tr = $_[0]; |
|
92
|
0
|
|
|
|
|
|
$series = $self->parse_writer_series_name( $tr, $series ); |
|
93
|
|
|
|
|
|
|
|
|
94
|
0
|
|
|
|
|
|
my $book = $self->parse_writer_book_info( $tr, $series ); |
|
95
|
0
|
0
|
0
|
|
|
|
push @book_list, $book if ( $book and $book->{url} =~ /onebook/ ); |
|
96
|
0
|
|
|
0
|
|
|
}; |
|
97
|
0
|
|
|
|
|
|
}; |
|
98
|
|
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
|
my $ref = $parse_writer->scrape( $h ); |
|
100
|
|
|
|
|
|
|
|
|
101
|
0
|
|
|
|
|
|
$self->tidy_string( $ref, 'writer' ); |
|
102
|
0
|
|
|
|
|
|
$_->{writer} = $ref->{writer} for @book_list; |
|
103
|
|
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
return \@book_list; |
|
105
|
|
|
|
|
|
|
} ## end sub parse_board_item |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub parse_writer_series_name { |
|
108
|
0
|
|
|
0
|
0
|
|
my ( $self, $tr, $series ) = @_; |
|
109
|
|
|
|
|
|
|
|
|
110
|
0
|
0
|
|
|
|
|
return $series unless ( $tr->look_down( 'colspan', '7' ) ); |
|
111
|
|
|
|
|
|
|
|
|
112
|
0
|
0
|
|
|
|
|
if ( $tr->as_trimmed_text =~ /【(.*)】/ ) { |
|
113
|
0
|
|
|
|
|
|
$series = $1; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
0
|
|
|
|
|
|
return $series; |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub parse_writer_book_info { |
|
120
|
0
|
|
|
0
|
0
|
|
my ( $self, $tr, $series ) = @_; |
|
121
|
|
|
|
|
|
|
|
|
122
|
0
|
|
|
|
|
|
my $book = $tr->look_down( '_tag', 'a' ); |
|
123
|
0
|
0
|
|
|
|
|
return unless ( $book ); |
|
124
|
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my $book_url = $book->attr( 'href' ); |
|
126
|
|
|
|
|
|
|
|
|
127
|
0
|
|
|
|
|
|
my $bookname = $book->as_trimmed_text; |
|
128
|
0
|
|
|
|
|
|
substr( $bookname, 0, 1 ) = ''; |
|
129
|
0
|
0
|
|
|
|
|
$bookname .= '[锁]' if ( $tr->look_down( 'color', 'gray' ) ); |
|
130
|
|
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
|
my $progress = ( $tr->look_down( '_tag', 'td' ) )[4]->as_trimmed_text; |
|
132
|
|
|
|
|
|
|
return { |
|
133
|
0
|
|
|
|
|
|
series => $series, |
|
134
|
|
|
|
|
|
|
book => "$bookname($progress)", |
|
135
|
|
|
|
|
|
|
url => $self->base_url() . "/$book_url", |
|
136
|
|
|
|
|
|
|
}; |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
} ## end sub parse_writer_book_info |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub make_query_request { |
|
141
|
|
|
|
|
|
|
|
|
142
|
0
|
|
|
0
|
1
|
|
my ( $self, $keyword, %opt ) = @_; |
|
143
|
0
|
|
0
|
|
|
|
$opt{query_type} ||= '作品'; |
|
144
|
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
|
my %qt = ( |
|
146
|
|
|
|
|
|
|
'作品' => '1', |
|
147
|
|
|
|
|
|
|
'作者' => '2', |
|
148
|
|
|
|
|
|
|
'主角' => '4', |
|
149
|
|
|
|
|
|
|
'配角' => '5', |
|
150
|
|
|
|
|
|
|
'其他' => '6', |
|
151
|
|
|
|
|
|
|
); |
|
152
|
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
$keyword = $self->encode_cjk_for_url($keyword); |
|
154
|
0
|
|
|
|
|
|
my $url = $self->base_url() . qq[/search.php?kw=$keyword&t=$qt{$opt{query_type}}]; |
|
155
|
|
|
|
|
|
|
#$url = encode( $self->charset(), $url ); |
|
156
|
|
|
|
|
|
|
|
|
157
|
0
|
|
|
|
|
|
return $url; |
|
158
|
|
|
|
|
|
|
} ## end sub make_query_request |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub parse_query_list { |
|
161
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
|
162
|
|
|
|
|
|
|
my $parse_query = scraper { |
|
163
|
|
|
|
|
|
|
process '//div[@class="page"]/a', 'urls[]' => sub { |
|
164
|
0
|
0
|
|
|
|
|
return unless ( $_[0]->as_text =~ /^\[\d*\]$/ ); |
|
165
|
0
|
|
|
|
|
|
my $url = $self->base_url() . ( $_[0]->attr( 'href' ) ); |
|
166
|
0
|
|
|
|
|
|
$url = encode( $self->charset(), $url ); |
|
167
|
0
|
|
|
|
|
|
return $url; |
|
168
|
0
|
|
|
0
|
|
|
}; |
|
169
|
0
|
|
|
|
|
|
}; |
|
170
|
0
|
|
|
|
|
|
my $r = $parse_query->scrape( $h ); |
|
171
|
0
|
|
0
|
|
|
|
return $r->{urls} || []; |
|
172
|
|
|
|
|
|
|
} ## |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
sub parse_query_item { |
|
175
|
0
|
|
|
0
|
0
|
|
my ( $self, $h ) = @_; |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
my $parse_query = scraper { |
|
178
|
0
|
|
|
0
|
|
|
process '//h3[@class="title"]/a', |
|
179
|
|
|
|
|
|
|
'books[]' => { |
|
180
|
|
|
|
|
|
|
'book' => 'TEXT', |
|
181
|
|
|
|
|
|
|
'url' => '@href', |
|
182
|
|
|
|
|
|
|
}; |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
process '//div[@class="info"]', 'writers[]' => sub { |
|
185
|
0
|
|
|
|
|
|
my ( $writer, $progress ) = $_[0]->as_text =~ /作者:(.+?) \┃ 进度:(\S+)/s; |
|
186
|
0
|
|
|
|
|
|
return { writer => $writer, progress => $progress }; |
|
187
|
0
|
|
|
|
|
|
}; |
|
188
|
0
|
|
|
|
|
|
}; |
|
189
|
0
|
|
|
|
|
|
my $ref = $parse_query->scrape( $h ); |
|
190
|
|
|
|
|
|
|
|
|
191
|
0
|
|
|
|
|
|
my @result; |
|
192
|
0
|
|
|
|
|
|
foreach my $i ( 0 .. $#{ $ref->{books} } ) { |
|
|
0
|
|
|
|
|
|
|
|
193
|
0
|
|
|
|
|
|
my $r = $ref->{books}[$i]; |
|
194
|
0
|
0
|
|
|
|
|
next unless ( $r->{url} ); |
|
195
|
|
|
|
|
|
|
|
|
196
|
0
|
|
|
|
|
|
my $w = $ref->{writers}[$i]; |
|
197
|
0
|
|
|
|
|
|
$r->{title} .= "($w->{progress})"; |
|
198
|
0
|
|
|
|
|
|
push @result, { %$w, %$r }; |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
0
|
|
|
|
|
|
return \@result; |
|
202
|
|
|
|
|
|
|
} ## end sub parse_query_item |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
1; |