File Coverage

blib/lib/Novel/Robot/Parser.pm
Criterion Covered Total %
statement 235 383 61.3
branch 57 136 41.9
condition 43 118 36.4
subroutine 34 59 57.6
pod 3 34 8.8
total 372 730 50.9


line stmt bran cond sub pod time code
1             # ABSTRACT: get novel / bbs content from website
2             package Novel::Robot::Parser;
3              
4 3     3   1333 use strict;
  3         11  
  3         72  
5 3     3   12 use warnings;
  3         4  
  3         64  
6 3     3   446 use utf8;
  3         14  
  3         10  
7              
8 3     3   1113 use Novel::Robot::Browser;
  3         27  
  3         97  
9 3     3   16 use URI;
  3         5  
  3         45  
10 3     3   14 use Encode;
  3         4  
  3         186  
11 3     3   1263 use Web::Scraper;
  3         194758  
  3         29  
12 3     3   168 use HTML::TreeBuilder;
  3         6  
  3         28  
13 3     3   69 use Data::Dumper;
  3         5  
  3         4303  
14              
15             ### {{{ data
16              
17             #our $VERSION = 0.32;
18              
19             our %SITE_DOM_NAME = (
20             'bbs.jjwxc.net' => 'hjj',
21             'www.jjwxc.net' => 'jjwxc',
22             'tieba.baidu.com' => 'tieba',
23              
24             'www.bearead.com' => 'bearead',
25             'wwwj.bearead.com' => 'bearead',
26             'www.ddshu.net' => 'ddshu',
27             'www.kanunu8.com' => 'kanunu8',
28             );
29              
30             our %NULL_INDEX = (
31             url => '',
32             book => '',
33             writer => '',
34             writer_url => '',
35             item_list => [],
36              
37             intro => '',
38             series => '',
39             progress => '',
40             word_num => '',
41             );
42              
43             our %NULL_CHAPTER = (
44             content => '',
45             id => 0,
46             pid => 0,
47             time => '',
48             title => '',
49             url => '',
50             writer => '',
51             writer_say => '',
52             abstract => '',
53             word_num => '',
54             type => '',
55             );
56              
57             ### }}}
58              
59             ### init {{{
60             sub new {
61 4     4 1 1543 my ( $self, %opt ) = @_;
62              
63 4         20 $opt{site} = $self->detect_site( $opt{site} );
64              
65 4         15 my $module = "Novel::Robot::Parser::$opt{site}";
66 4         262 eval "require $module;";
67              
68 4         56 my $browser = Novel::Robot::Browser->new( %opt );
69 4         53 bless { browser => $browser, %opt }, $module;
70             }
71              
72       0 0   sub domain { }
73              
74             sub detect_domain {
75 0     0 0 0 my ( $self, $url ) = @_;
76 0 0       0 return ( $url, $url ) unless ( $url =~ /^https?:/ );
77              
78 0         0 my ( $dom ) = $url =~ m#^.*?\/\/(.+?)(?:/|$)#;
79              
80 0         0 my $base_dom = $dom;
81 0         0 $base_dom =~ s/^[^.]+\.//;
82 0 0       0 $base_dom = $base_dom =~ /\./ ? $base_dom : $dom;
83 0         0 return ( $dom, $base_dom );
84             }
85              
86             sub detect_site {
87 4     4 0 13 my ( $self, $url ) = @_;
88              
89 4 50 33     28 if ( $url and $url =~ /^https?:/ ) {
90 0         0 my ( $dom ) = $url =~ m#^.*?\/\/(.+?)/#;
91 0 0       0 return $SITE_DOM_NAME{$dom} if ( exists $SITE_DOM_NAME{$dom} );
92 0         0 return 'default';
93             }
94              
95 4   50     18 return $url // 'default';
96             }
97             ### }}}
98              
99             ### {{{ common
100 3     3 0 11 sub site_type { 'novel' }
101 0     0 0 0 sub charset { 'cp936' }
102       0 0   sub base_url { }
103              
104             sub generate_novel_url {
105 0     0 0 0 my ( $self, $index_url, @args ) = @_;
106 0         0 return ( $index_url, @args );
107             }
108              
109             ### }}}
110              
111             ### {{{ novel
112              
113             sub get_novel_info {
114 1     1 1 15 my ( $self, $url ) = @_;
115 1         5 my ( $i_url, $post_data ) = $self->generate_novel_url( $url );
116 1         10 my $c = $self->{browser}->request_url( $i_url, $post_data );
117              
118 1         23 my $r = $self->extract_elements(
119             \$c,
120             path => $self->scrape_novel(),
121             sub => $self->can( "parse_novel" ),
122             );
123 1         12 $r->{item_list} = $self->parse_item_list( \$c, $r );
124 1         8 ( $r->{item_list}, $r->{item_num} ) = $self->update_item_list( $r->{item_list}, $url );
125 1         5 return $r;
126             }
127              
128             sub get_novel_ref {
129 3     3 1 820 my ( $self, $index_url, %o ) = @_;
130              
131 3 50       14 return $self->get_tiezi_ref( $index_url, %o ) if ( $self->site_type() eq 'tiezi' );
132              
133 3         6 my ( $r, $item_list, $max_item_num );
134 3 100       16 if ( $index_url !~ /^https?:/ ) {
135 2         8 $r = $self->parse_novel( $index_url, %o );
136             } else {
137 1         4 my ( $i_url, $post_data ) = $self->generate_novel_url( $index_url );
138              
139             ( $r, $item_list ) = $self->{browser}->request_url_whole(
140             $i_url,
141             post_data => $post_data,
142             info_sub => sub {
143 1     1   19 $self->extract_elements(
144             @_,
145             path => $self->scrape_novel(),
146             sub => $self->can( "parse_novel" ),
147             );
148             },
149 1     1   11 item_list_sub => sub { $self->can( "parse_item_list" )->( $self, @_ ) },
150             item_sub => sub {
151 1     1   23 $self->extract_elements(
152             @_,
153             path => $self->scrape_novel_item(),
154             sub => $self->can( "parse_novel_item" ),
155             );
156             },
157 1         11 %o,
158             );
159              
160 1         13 $r->{url} = $index_url;
161 1   50     9 $r->{item_list} = $item_list || [];
162              
163             #$r->{item_num} = $max_item_num || undef;
164             } ## end else [ if ( $index_url !~ /^https?:/)]
165              
166 3         18 ( $r->{item_list}, $r->{item_num} ) = $self->update_item_list( $r->{item_list}, $index_url );
167 3         23 $self->filter_item_list( $r, %o );
168 3         15 $r->{writer_url} = $self->format_abs_url( $r->{writer_url}, $index_url );
169              
170 3         433 for my $k ( qw/writer book/ ) {
171 6 50       15 $r->{$k} = $o{$k} if ( exists $o{$k} );
172             }
173 3   66     61 $r->{$_} ||= $NULL_INDEX{$_} for keys( %NULL_INDEX );
174 3         18 $r->{$_} = $self->tidy_string( $r->{$_} ) for qw/writer book/;
175              
176 3         14 return $r;
177             } ## end sub get_novel_ref
178              
179             sub scrape_novel {
180 2     2 0 8 my ( $self ) = @_;
181 2         5 my $r = {};
182 2 50       9 push @{$r->{book}}, { path => $self->{book_path} } if ( exists $self->{book_path} );
  0         0  
183 2 50       13 push @{$r->{book}}, { regex => $self->{book_regex} } if ( exists $self->{book_regex} );
  0         0  
184 2 50       8 push @{$r->{writer}}, { path => $self->{writer_path} } if ( exists $self->{writer_path} );
  0         0  
185 2 50       11 push @{$r->{writer}}, { regex => $self->{writer_regex} } if ( exists $self->{writer_regex} );
  0         0  
186 2         51 return $r;
187             }
188              
189             sub parse_novel {
190 0     0 0 0 my ( $self, $h, $r ) = @_;
191              
192 0   0     0 $r->{book} ||= $self->scrape_element_try(
193             $h,
194             [ { path => '//meta[@name="og:novel:book_name"]', extract => '@content' },
195             { path => '//meta[@property="og:novel:book_name"]', extract => '@content' },
196             { path => '//meta[@property="og:title"]', extract => '@content' },
197             { path => '//div[@id="title"]', },
198             { path => '//div[@class="title"]', },
199             { regex => qr#[^<]+?([^,<]+?)全文阅读,#si, }, </td> </tr> <tr> <td class="h" > <a name="200">200</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { regex => qr#<title>[^<]+?《([^,<]+?)》#si, }, </td> </tr> <tr> <td class="h" > <a name="201">201</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { regex => qr#<title>[^<]+?,([^,<]+?)最新章节#si, }, </td> </tr> <tr> <td class="h" > <a name="202">202</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { path => '//h1', }, </td> </tr> <tr> <td class="h" > <a name="203">203</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { path => '//h2', }, </td> </tr> <tr> <td class="h" > <a name="204">204</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ], </td> </tr> <tr> <td class="h" > <a name="205">205</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub => $self->can( "tidy_writer_book" ), </td> </tr> <tr> <td class="h" > <a name="206">206</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="207">207</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="208">208</a> </td> <td class="c0" > <a href="#229"> 0 </a> </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#208-1"> 0 </a> </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $r->{writer} ||= $self->scrape_element_try( </td> </tr> <tr> <td class="h" > <a name="209">209</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $h, </td> </tr> <tr> <td class="h" > <a name="210">210</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> [ { path => '//meta[@name="author"]', extract => '@content' }, </td> </tr> <tr> <td class="h" > <a name="211">211</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="212">212</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { path => '//meta[@name="og:novel:author"]', extract => '@content' }, </td> </tr> <tr> <td class="h" > <a name="213">213</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { path => '//meta[@property="og:novel:author"]', extract => '@content' }, </td> </tr> <tr> <td class="h" > <a name="214">214</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { path => '//*[@class="author"]', }, </td> </tr> <tr> <td class="h" > <a name="215">215</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { path => '//*[@class="writer"]', }, </td> </tr> <tr> <td class="h" > <a name="216">216</a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#216-1"> 3 </a> </td> <td >   </td> <td > 36 </td> <td class="s"> { regex => qr#<span>作者:</span>([^<]+)#si, }, </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 6 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 53 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="217">217</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { regex => qr#作者:<span>([^<]+)</span>#si, }, </td> </tr> <tr> <td class="h" > <a name="218">218</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { regex => qr#<(?:em|i|h3|h2|span)>作者:([^<]+)</(?:em|i|h3|h2|span)>#si, }, </td> </tr> <tr> <td class="h" > <a name="219">219</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { regex => qr#作者:(?:<span>)?<a[^>]*>([^<]+)</a>#si, }, </td> </tr> <tr> <td class="h" > <a name="220">220</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { regex => qr#<p>作(?: |\s)*者:([^<]+)</p>#si, }, </td> </tr> <tr> <td class="h" > <a name="221">221</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { regex => qr#作者:([^<]+?) 发布时间:#s, }, </td> </tr> <tr> <td class="h" > <a name="222">222</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { regex => qr#content="([^"]+?)最新著作#s, }, </td> </tr> <tr> <td class="h" > <a name="223">223</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { regex => qr#<title>[^<,]+?最新章节\(([^<,]+?)\),#si, }, </td> </tr> <tr> <td class="h" > <a name="224">224</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { regex => qr#content="[^"]+?,([^",]+?)作品#s, }, </td> </tr> <tr> <td class="h" > <a name="225">225</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ], </td> </tr> <tr> <td class="h" > <a name="226">226</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub => $self->can( "tidy_writer_book" ), </td> </tr> <tr> <td class="h" > <a name="227">227</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="228">228</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="229">229</a> </td> <td class="c0" > <a href="#231"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $r->{$_} = $self->tidy_writer_book( $r->{$_} ) for qw/writer book title/; </td> </tr> <tr> <td class="h" > <a name="230">230</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="231">231</a> </td> <td class="c0" > <a href="#252"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $r; </td> </tr> <tr> <td class="h" > <a name="232">232</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } ## end sub parse_novel </td> </tr> <tr> <td class="h" > <a name="233">233</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="234">234</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub scrape_item_list { </td> </tr> <tr> <td class="h" > <a name="235">235</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#235-1"> 1 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#235-1"> 0 </a> </td> <td > 3 </td> <td class="s"> my ( $self ) = @_; </td> </tr> <tr> <td class="h" > <a name="236">236</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 2 </td> <td class="s"> my $r = {}; </td> </tr> <tr> <td class="h" > <a name="237">237</a> </td> <td class="c3" > 1 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#237-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 6 </td> <td class="s"> $r->{path} = $self->{item_list_path} if ( exists $self->{item_list_path} ); </td> </tr> <tr> <td class="h" > <a name="238">238</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 2 </td> <td class="s"> return $r; </td> </tr> <tr> <td class="h" > <a name="239">239</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="240">240</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="241">241</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub parse_item_list { </td> </tr> <tr> <td class="h" > <a name="242">242</a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#242-1"> 2 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#242-1"> 0 </a> </td> <td > 7 </td> <td class="s"> my ( $self, $h, $r ) = @_; </td> </tr> <tr> <td class="h" > <a name="243">243</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="244">244</a> </td> <td class="c3" > 2 </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#244-1"> 100 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 13 </td> <td class="s"> return $r->{item_list} if ( exists $r->{item_list} ); </td> </tr> <tr> <td class="h" > <a name="245">245</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="246">246</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 7 </td> <td class="s"> my $path_r = $self->scrape_item_list(); </td> </tr> <tr> <td class="h" > <a name="247">247</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="248">248</a> </td> <td class="c3" > 1 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#248-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 10 </td> <td class="s"> return $self->guess_item_list( $h ) unless ( exists $path_r->{path} ); </td> </tr> <tr> <td class="h" > <a name="249">249</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="250">250</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $parse_novel = scraper { </td> </tr> <tr> <td class="h" > <a name="251">251</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> process $path_r->{path}, </td> </tr> <tr> <td class="h" > <a name="252">252</a> </td> <td class="c0" > <a href="#256"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#252-1"> 0 </a> </td> <td >   </td> <td > 0 </td> <td class="s"> 'item_list[]' => { </td> </tr> <tr> <td class="h" > <a name="253">253</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> 'title' => 'TEXT', </td> </tr> <tr> <td class="h" > <a name="254">254</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> 'url' => '@href' </td> </tr> <tr> <td class="h" > <a name="255">255</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> }; </td> </tr> <tr> <td class="h" > <a name="256">256</a> </td> <td class="c0" > <a href="#257"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> }; </td> </tr> <tr> <td class="h" > <a name="257">257</a> </td> <td class="c0" > <a href="#259"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my $ref = $parse_novel->scrape( $h ); </td> </tr> <tr> <td class="h" > <a name="258">258</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="259">259</a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#259-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my @chap = grep { exists $_->{url} and $_->{url} } @{ $ref->{item_list} }; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="#261"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="260">260</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="261">261</a> </td> <td class="c0" > <a href="#262"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#261-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> if ( $path_r->{sort} ) { </td> </tr> <tr> <td class="h" > <a name="262">262</a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> @chap = sort { $a->{url} cmp $b->{url} } @chap; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="#265"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="263">263</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="264">264</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="265">265</a> </td> <td class="c0" > <a href="#330"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return \@chap; </td> </tr> <tr> <td class="h" > <a name="266">266</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } ## end sub parse_item_list </td> </tr> <tr> <td class="h" > <a name="267">267</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="268">268</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub guess_item_list { </td> </tr> <tr> <td class="h" > <a name="269">269</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#269-1"> 1 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#269-1"> 0 </a> </td> <td > 4 </td> <td class="s"> my ( $self, $h, %opt ) = @_; </td> </tr> <tr> <td class="h" > <a name="270">270</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="271">271</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 3 </td> <td class="s"> my $new_h = $$h; </td> </tr> <tr> <td class="h" > <a name="272">272</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 65 </td> <td class="s"> $new_h=~s#<dt>[^<]+最新\d+章节</dt>.+?<dt>#<dt>#s; </td> </tr> <tr> <td class="h" > <a name="273">273</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="274">274</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 11 </td> <td class="s"> my $tree = HTML::TreeBuilder->new(); </td> </tr> <tr> <td class="h" > <a name="275">275</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 330 </td> <td class="s"> $tree->parse( $new_h ); </td> </tr> <tr> <td class="h" > <a name="276">276</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="277">277</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 27770 </td> <td class="s"> my @links = $tree->look_down( '_tag', 'a' ); </td> </tr> <tr> <td class="h" > <a name="278">278</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 873 </td> <td class="s"> @links = grep { $_->attr( 'href' ) } @links; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 24 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 189 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="279">279</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 12 </td> <td class="s"> for my $x ( @links ) { </td> </tr> <tr> <td class="h" > <a name="280">280</a> </td> <td class="c3" > 24 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 842 </td> <td class="s"> my $up_url = $x->attr( 'href' ); </td> </tr> <tr> <td class="h" > <a name="281">281</a> </td> <td class="c3" > 24 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 257 </td> <td class="s"> $up_url =~ s#/[^/]+/?$#/#; </td> </tr> <tr> <td class="h" > <a name="282">282</a> </td> <td class="c3" > 24 </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#282-1"> 100 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 55 </td> <td class="s"> $up_url = '.' if ( $up_url !~ m#/# ); </td> </tr> <tr> <td class="h" > <a name="283">283</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="284">284</a> </td> <td class="c3" > 24 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 46 </td> <td class="s"> $x = { parent => $up_url, depth => $x->depth(), url => $x->attr( 'href' ), title => $x->as_text() }; </td> </tr> <tr> <td class="h" > <a name="285">285</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="286">286</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="287">287</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 69 </td> <td class="s"> my @out_links; </td> </tr> <tr> <td class="h" > <a name="288">288</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 3 </td> <td class="s"> my @temp_arr = ( $links[0] ); </td> </tr> <tr> <td class="h" > <a name="289">289</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 2 </td> <td class="s"> my $parent = $links[0]{parent}; </td> </tr> <tr> <td class="h" > <a name="290">290</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 3 </td> <td class="s"> my $depth = $links[0]{depth}; </td> </tr> <tr> <td class="h" > <a name="291">291</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 5 </td> <td class="s"> for ( my $i = 1 ; $i <= $#links ; $i++ ) { </td> </tr> <tr> <td class="h" > <a name="292">292</a> </td> <td class="c3" > 23 </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#292-1"> 100 </a> </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#292-1"> 100 </a> </td> <td >   </td> <td >   </td> <td > 55 </td> <td class="s"> if ( $depth == $links[$i]{depth} and $parent eq $links[$i]{parent} ) { </td> </tr> <tr> <td class="h" > <a name="293">293</a> </td> <td class="c3" > 8 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 15 </td> <td class="s"> push @temp_arr, $links[$i]; </td> </tr> <tr> <td class="h" > <a name="294">294</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } else { </td> </tr> <tr> <td class="h" > <a name="295">295</a> </td> <td class="c3" > 15 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 24 </td> <td class="s"> push @out_links, [@temp_arr]; </td> </tr> <tr> <td class="h" > <a name="296">296</a> </td> <td class="c3" > 15 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 19 </td> <td class="s"> @temp_arr = ( $links[$i] ); </td> </tr> <tr> <td class="h" > <a name="297">297</a> </td> <td class="c3" > 15 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 18 </td> <td class="s"> $depth = $links[$i]{depth}; </td> </tr> <tr> <td class="h" > <a name="298">298</a> </td> <td class="c3" > 15 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 33 </td> <td class="s"> $parent = $links[$i]{parent}; </td> </tr> <tr> <td class="h" > <a name="299">299</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="300">300</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="301">301</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="302">302</a> </td> <td class="c3" > 1 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#302-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 5 </td> <td class="s"> push @out_links, \@temp_arr if ( @temp_arr ); </td> </tr> <tr> <td class="h" > <a name="303">303</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="304">304</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 6 </td> <td class="s"> @out_links = sort { scalar( @$b ) <=> scalar( @$a ) } @out_links; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 36 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 40 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="305">305</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="306">306</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 2 </td> <td class="s"> my $res_arr; </td> </tr> <tr> <td class="h" > <a name="307">307</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 5 </td> <td class="s"> my $title_regex = </td> </tr> <tr> <td class="h" > <a name="308">308</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> qr/引子|楔子|内容简介|正文|序言|文案|第\s*[0123456789零○〇一二三四五六七八九十百千\d]+\s*(章|节)|(^[0-9]+)/; </td> </tr> <tr> <td class="h" > <a name="309">309</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 4 </td> <td class="s"> my $chap_num_regex = qr/(^|\/)\d+(\.html)?$/; </td> </tr> <tr> <td class="h" > <a name="310">310</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 4 </td> <td class="s"> for my $arr ( @out_links ) { </td> </tr> <tr> <td class="h" > <a name="311">311</a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 4 </td> <td class="s"> my $x = $arr->[0]; </td> </tr> <tr> <td class="h" > <a name="312">312</a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 4 </td> <td class="s"> my $y = $arr->[1]; </td> </tr> <tr> <td class="h" > <a name="313">313</a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 5 </td> <td class="s"> my $z = $arr->[-1]; </td> </tr> <tr> <td class="h" > <a name="314">314</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="315">315</a> </td> <td class="c3" > 3 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#315-1"> 50 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#315-1"> 33 </a> </td> <td >   </td> <td >   </td> <td > 17 </td> <td class="s"> $res_arr = $arr if ( $opt{chapter_url_regex} and $x->{url} =~ /$opt{chapter_url_regex}/ ); </td> </tr> <tr> <td class="h" > <a name="316">316</a> </td> <td class="c3" > 3 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#316-1"> 50 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#316-1"> 33 </a> </td> <td >   </td> <td >   </td> <td > 10 </td> <td class="s"> $res_arr = $arr if ( $opt{chapter_title_regex} and $x->{title} =~ /$opt{chapter_title_regex}/ ); </td> </tr> <tr> <td class="h" > <a name="317">317</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $res_arr = $arr </td> </tr> <tr> <td class="h" > <a name="318">318</a> </td> <td class="c3" > 3 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#318-1"> 50 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#318-1"> 33 </a> </td> <td >   </td> <td >   </td> <td > 44 </td> <td class="s"> if ( $x->{title} =~ /$title_regex/ or ( $y and $y->{title} =~ /$title_regex/ ) or ( $z and $z->{title} =~ /$title_regex/ ) ); </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-2"> 66 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-3"> 33 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-4"> 33 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="319">319</a> </td> <td class="c3" > 3 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#319-1"> 50 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#319-1"> 66 </a> </td> <td >   </td> <td >   </td> <td > 34 </td> <td class="s"> $res_arr = $arr if ( ( $x->{url} =~ /$chap_num_regex/ or $z->{url} =~ /$chap_num_regex/ ) and scalar( @$arr ) > 50 ); </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-2"> 66 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="320">320</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="321">321</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #$res_arr= $arr if( ($x->{url}=~/\/?\d+$/ or $z->{url}=~/\/?\d+$/) and scalar(@$arr)>50); </td> </tr> <tr> <td class="h" > <a name="322">322</a> </td> <td class="c3" > 3 </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#322-1"> 100 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 9 </td> <td class="s"> last if ( $res_arr ); </td> </tr> <tr> <td class="h" > <a name="323">323</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="324">324</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="325">325</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #remove not chapter url </td> </tr> <tr> <td class="h" > <a name="326">326</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 2 </td> <td class="s"> while ( 1 ) { </td> </tr> <tr> <td class="h" > <a name="327">327</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 2 </td> <td class="s"> my $x = $res_arr->[0]; </td> </tr> <tr> <td class="h" > <a name="328">328</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 18 </td> <td class="s"> my $y = $res_arr->[ int( $#$res_arr / 2 ) ]; </td> </tr> <tr> <td class="h" > <a name="329">329</a> </td> <td class="c3" > 1 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#329-1"> 50 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#329-1"> 33 </a> </td> <td >   </td> <td >   </td> <td > 56 </td> <td class="s"> if ( defined $y->{title} and $y->{title} =~ /$title_regex/ and defined $y->{url} and $y->{url} =~ /\.html$/ and $x->{url} !~ /\.html$/ ) { </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#-2"> 50 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-2"> 33 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-3"> 33 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-4"> 33 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-5"> 33 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-6"> 33 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-7"> 33 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-8"> 33 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="330">330</a> </td> <td class="c0" > <a href="#332"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> shift( @$res_arr ); </td> </tr> <tr> <td class="h" > <a name="331">331</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } elsif ( defined $y->{title} and $y->{title} =~ /$title_regex/ and defined $y->{url} and $y->{url} =~ /$chap_num_regex/ and $x->{url} !~ /$chap_num_regex/ ) { </td> </tr> <tr> <td class="h" > <a name="332">332</a> </td> <td class="c0" > <a href="#343"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> shift( @$res_arr ); </td> </tr> <tr> <td class="h" > <a name="333">333</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } else { </td> </tr> <tr> <td class="h" > <a name="334">334</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 3 </td> <td class="s"> last; </td> </tr> <tr> <td class="h" > <a name="335">335</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="336">336</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="337">337</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="338">338</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #sort chapter url </td> </tr> <tr> <td class="h" > <a name="339">339</a> </td> <td class="c3" > 1 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#339-1"> 50 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#339-1"> 33 </a> </td> <td >   </td> <td >   </td> <td > 21 </td> <td class="s"> if ( $res_arr and defined $res_arr->[0]{url} and $res_arr->[0]{url} =~ /$chap_num_regex/ ) { </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-2"> 33 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="340">340</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#340-1"> 3 </a> </td> <td >   </td> <td > 6 </td> <td class="s"> my $trim_sub = sub { my $s = $_[0]; $s =~ s/^.+\///; $s =~ s/\.html$//; return $s }; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 5 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 14 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 5 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 6 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="341">341</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 3 </td> <td class="s"> my @sort_arr; </td> </tr> <tr> <td class="h" > <a name="342">342</a> </td> <td class="c3" > 1 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#342-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 13 </td> <td class="s"> if($opt{sort_chapter_url}){ </td> </tr> <tr> <td class="h" > <a name="343">343</a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> @sort_arr = sort { $trim_sub->( $a->{url} ) <=> $trim_sub->( $b->{url} ) } grep { $_->{url} =~ /$chap_num_regex/ } @$res_arr; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="#356"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="344">344</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> }else{ </td> </tr> <tr> <td class="h" > <a name="345">345</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 10 </td> <td class="s"> @sort_arr = @$res_arr; </td> </tr> <tr> <td class="h" > <a name="346">346</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="347">347</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 5 </td> <td class="s"> my @s = map { $trim_sub->( $_->{url} ) } @sort_arr; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 9 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="348">348</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 3 </td> <td class="s"> my $random_sort = 0; </td> </tr> <tr> <td class="h" > <a name="349">349</a> </td> <td class="c3" > 1 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 5 </td> <td class="s"> for my $i ( 0 .. $#s - 1 ) { </td> </tr> <tr> <td class="h" > <a name="350">350</a> </td> <td class="c3" > 2 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#350-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 8 </td> <td class="s"> $random_sort = 1 if ( $s[$i] > $s[ $i + 1 ] ); </td> </tr> <tr> <td class="h" > <a name="351">351</a> </td> <td class="c3" > 2 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#351-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 5 </td> <td class="s"> last if ( $random_sort ); </td> </tr> <tr> <td class="h" > <a name="352">352</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="353">353</a> </td> <td class="c3" > 1 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#353-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 108 </td> <td class="s"> return \@sort_arr if ( $random_sort == 0 ); </td> </tr> <tr> <td class="h" > <a name="354">354</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="355">355</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="356">356</a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#356-1"> 0 </a> </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $res_arr || []; </td> </tr> <tr> <td class="h" > <a name="357">357</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } ## end sub guess_item_list </td> </tr> <tr> <td class="h" > <a name="358">358</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="359">359</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub scrape_novel_item { </td> </tr> <tr> <td class="h" > <a name="360">360</a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#360-1"> 2 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#360-1"> 0 </a> </td> <td > 31 </td> <td class="s"> my ( $self ) = @_; </td> </tr> <tr> <td class="h" > <a name="361">361</a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 9 </td> <td class="s"> my $r = {}; </td> </tr> <tr> <td class="h" > <a name="362">362</a> </td> <td class="c3" > 2 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#362-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 16 </td> <td class="s"> push @{$r->{content}}, { path => $self->{content_path}, extract => 'HTML' } if( exists $self->{content_path} ); </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="363">363</a> </td> <td class="c3" > 2 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#363-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 11 </td> <td class="s"> push @{$r->{content}}, { regex => $self->{content_regex} } if( exists $self->{content_regex} ); </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="#372"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="364">364</a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 4 </td> <td class="s"> push @{$r->{content}}, ( </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 17 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="365">365</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { path => '//div[@class="novel_content"]' }, </td> </tr> <tr> <td class="h" > <a name="366">366</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> { path => '//div[@id="content"]' }, </td> </tr> <tr> <td class="h" > <a name="367">367</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="368">368</a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 28 </td> <td class="s"> return $r; </td> </tr> <tr> <td class="h" > <a name="369">369</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="370">370</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="371">371</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub parse_novel_item { </td> </tr> <tr> <td class="h" > <a name="372">372</a> </td> <td class="c0" > <a href="#374"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#372-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#372-1"> 0 </a> </td> <td > 0 </td> <td class="s"> my ( $self, $h, $r ) = @_; </td> </tr> <tr> <td class="h" > <a name="373">373</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="374">374</a> </td> <td class="c0" > <a href="#375"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#374-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $r = $self->guess_novel_item( $h ) unless ( $r->{content} ); </td> </tr> <tr> <td class="h" > <a name="375">375</a> </td> <td class="c0" > <a href="#376"> 0 </a> </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#375-1"> 0 </a> </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $r->{$_} ||= $NULL_CHAPTER{$_} for keys( %NULL_CHAPTER ); </td> </tr> <tr> <td class="h" > <a name="376">376</a> </td> <td class="c0" > <a href="#377"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $r->{content} = $self->tidy_content( $r->{content} ); </td> </tr> <tr> <td class="h" > <a name="377">377</a> </td> <td class="c0" > <a href="#381"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $r; </td> </tr> <tr> <td class="h" > <a name="378">378</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="379">379</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="380">380</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub guess_novel_item { </td> </tr> <tr> <td class="h" > <a name="381">381</a> </td> <td class="c0" > <a href="#383"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#381-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#381-1"> 0 </a> </td> <td > 0 </td> <td class="s"> my ( $self, $h, %opt ) = @_; </td> </tr> <tr> <td class="h" > <a name="382">382</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="383">383</a> </td> <td class="c0" > <a href="#384"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $$h =~ s#<!--.+?-->##sg; </td> </tr> <tr> <td class="h" > <a name="384">384</a> </td> <td class="c0" > <a href="#386"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $$h =~ s#<script[^>]*>[^<]*</script>##sg; </td> </tr> <tr> <td class="h" > <a name="385">385</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="386">386</a> </td> <td class="c0" > <a href="#387"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my $tree = HTML::TreeBuilder->new(); </td> </tr> <tr> <td class="h" > <a name="387">387</a> </td> <td class="c0" > <a href="#389"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $tree->parse( $$h ); </td> </tr> <tr> <td class="h" > <a name="388">388</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="389">389</a> </td> <td class="c0" > <a href="#390"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my @links = $tree->look_down( 'text', undef ); </td> </tr> <tr> <td class="h" > <a name="390">390</a> </td> <td class="c0" > <a href="#391"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> for my $x ( @links ) { </td> </tr> <tr> <td class="h" > <a name="391">391</a> </td> <td class="c0" > <a href="#392"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $x = { content => $x->as_HTML( '<>&' ) }; </td> </tr> <tr> <td class="h" > <a name="392">392</a> </td> <td class="c0" > <a href="#394"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $self->calc_content_wordnum( $x ); </td> </tr> <tr> <td class="h" > <a name="393">393</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="394">394</a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my @out_links = sort { $b->{word_num} <=> $a->{word_num} } @links; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="#396"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="395">395</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="396">396</a> </td> <td class="c0" > <a href="#397"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my $no_next_r; </td> </tr> <tr> <td class="h" > <a name="397">397</a> </td> <td class="c0" > <a href="#398"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> for my $r ( @out_links ) { </td> </tr> <tr> <td class="h" > <a name="398">398</a> </td> <td class="c0" > <a href="#399"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#398-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> next if ( $r->{content} =~ m#</(style|head|body|html)>#s ); </td> </tr> <tr> <td class="h" > <a name="399">399</a> </td> <td class="c0" > <a href="#400"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#399-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> next if ( $r->{content} =~ m#^\s*<div id="footer">#s ); </td> </tr> <tr> <td class="h" > <a name="400">400</a> </td> <td class="c0" > <a href="#401"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#400-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> next if ( $r->{content} =~ /(上|下)一(章|页|篇)/s ); </td> </tr> <tr> <td class="h" > <a name="401">401</a> </td> <td class="c0" > <a href="#402"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#401-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> next if ( $r->{content} =~ m#</h(2|1)>#s ); </td> </tr> <tr> <td class="h" > <a name="402">402</a> </td> <td class="c0" > <a href="#403"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#402-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> next if ( $r->{content} =~ m#All rights reserved#s ); </td> </tr> <tr> <td class="h" > <a name="403">403</a> </td> <td class="c0" > <a href="#405"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#403-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> next if ( $r->{content} =~ m#(.+?</a>){5,}#s ); </td> </tr> <tr> <td class="h" > <a name="404">404</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="405">405</a> </td> <td class="c0" > <a href="#406"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $no_next_r = $r; </td> </tr> <tr> <td class="h" > <a name="406">406</a> </td> <td class="c0" > <a href="#410"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> last; </td> </tr> <tr> <td class="h" > <a name="407">407</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="408">408</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="409">409</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #my @grep_next_r = grep { $_->{content} =~ /(上|下)一(章|页|篇)\w{0,20}$/s and $_->{word_num} > 50 } @out_links; </td> </tr> <tr> <td class="h" > <a name="410">410</a> </td> <td class="c0" > <a href="#411"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my @grep_next_r = grep { $_->{content} =~ /(上|下)一(章|页|篇)/s </td> </tr> <tr> <td class="h" > <a name="411">411</a> </td> <td class="c0" > <a href="#414"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#411-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> and $_->{word_num} > 50 </td> </tr> <tr> <td class="h" > <a name="412">412</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } @out_links; </td> </tr> <tr> <td class="h" > <a name="413">413</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="414">414</a> </td> <td class="c0" > <a href="#415"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my $cc = $no_next_r->{content}; </td> </tr> <tr> <td class="h" > <a name="415">415</a> </td> <td class="c0" > <a href="#416"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my $cc_n = $cc =~ s/(\n|<p[^>]*>|<br[^>]*>)//sg; </td> </tr> <tr> <td class="h" > <a name="416">416</a> </td> <td class="c0" > <a href="#418"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#416-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#416-1"> 0 </a> </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $no_next_r if ( ( $cc_n > 5 and $no_next_r->{word_num} > 50) or !@grep_next_r ); </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-2"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="417">417</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="418">418</a> </td> <td class="c0" > <a href="#426"> 0 </a> </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#418-1"> 0 </a> </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $grep_next_r[-1] || {}; </td> </tr> <tr> <td class="h" > <a name="419">419</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } ## end sub guess_novel_item </td> </tr> <tr> <td class="h" > <a name="420">420</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="421">421</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ### }}} </td> </tr> <tr> <td class="h" > <a name="422">422</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="423">423</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ### {{{ tiezi </td> </tr> <tr> <td class="h" > <a name="424">424</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="425">425</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub get_tiezi_ref { </td> </tr> <tr> <td class="h" > <a name="426">426</a> </td> <td class="c0" > <a href="#428"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#426-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#426-1"> 0 </a> </td> <td > 0 </td> <td class="s"> my ( $self, $url, %o ) = @_; </td> </tr> <tr> <td class="h" > <a name="427">427</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="428">428</a> </td> <td class="c0" > <a href="#434"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my $class = 'novel'; </td> </tr> <tr> <td class="h" > <a name="429">429</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my ( $topic, $item_list ) = $self->{browser}->request_url_whole( </td> </tr> <tr> <td class="h" > <a name="430">430</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $url, </td> </tr> <tr> <td class="h" > <a name="431">431</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="432">432</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #post_data => $o{post_data}, </td> </tr> <tr> <td class="h" > <a name="433">433</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> info_sub => sub { </td> </tr> <tr> <td class="h" > <a name="434">434</a> </td> <td class="c0" > <a href="#440"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#434-1"> 0 </a> </td> <td >   </td> <td > 0 </td> <td class="s"> $self->extract_elements( </td> </tr> <tr> <td class="h" > <a name="435">435</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> @_, </td> </tr> <tr> <td class="h" > <a name="436">436</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> path => $self->can( "scrape_$class" )->( $self ), </td> </tr> <tr> <td class="h" > <a name="437">437</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub => $self->can( "parse_$class" ), </td> </tr> <tr> <td class="h" > <a name="438">438</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="439">439</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> }, </td> </tr> <tr> <td class="h" > <a name="440">440</a> </td> <td class="c0" > <a href="#441"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#440-1"> 0 </a> </td> <td >   </td> <td > 0 </td> <td class="s"> item_list_sub => sub { $self->can( "parse_${class}_item" )->( $self, @_ ) }, </td> </tr> <tr> <td class="h" > <a name="441">441</a> </td> <td class="c0" > <a href="#446"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#441-1"> 0 </a> </td> <td >   </td> <td > 0 </td> <td class="s"> page_list_sub => sub { $self->can( "parse_${class}_list" )->( $self, @_ ) }, </td> </tr> <tr> <td class="h" > <a name="442">442</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="443">443</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #min_page_num => $o{"min_page_num"}, </td> </tr> <tr> <td class="h" > <a name="444">444</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #max_page_num => $o{"max_page_num"}, </td> </tr> <tr> <td class="h" > <a name="445">445</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> stop_sub => sub { </td> </tr> <tr> <td class="h" > <a name="446">446</a> </td> <td class="c0" > <a href="#447"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#446-1"> 0 </a> </td> <td >   </td> <td > 0 </td> <td class="s"> my ( $info, $data_list, $i ) = @_; </td> </tr> <tr> <td class="h" > <a name="447">447</a> </td> <td class="c0" > <a href="#449"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $self->{browser}->is_list_overflow( $data_list, $o{"max_item_num"} ); </td> </tr> <tr> <td class="h" > <a name="448">448</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> }, </td> </tr> <tr> <td class="h" > <a name="449">449</a> </td> <td class="c0" > <a href="#452"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> %o, </td> </tr> <tr> <td class="h" > <a name="450">450</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="451">451</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="452">452</a> </td> <td class="c0" > <a href="#454"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $item_list = $self->update_item_list( $item_list, $url ); </td> </tr> <tr> <td class="h" > <a name="453">453</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="454">454</a> </td> <td class="c0" > <a href="#459"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#454-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> unshift @$item_list, $topic if ( $topic->{content} ); </td> </tr> <tr> <td class="h" > <a name="455">455</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my %r = ( </td> </tr> <tr> <td class="h" > <a name="456">456</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> %$topic, </td> </tr> <tr> <td class="h" > <a name="457">457</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> writer => $o{writer} || $topic->{writer}, </td> </tr> <tr> <td class="h" > <a name="458">458</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> book => $o{book} || $topic->{book} || $topic->{title}, </td> </tr> <tr> <td class="h" > <a name="459">459</a> </td> <td class="c0" > <a href="#462"> 0 </a> </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#459-1"> 0 </a> </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> url => $url, </td> </tr> <tr> <td class="h" > <a > </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#-2"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="460">460</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> item_list => $item_list, </td> </tr> <tr> <td class="h" > <a name="461">461</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="462">462</a> </td> <td class="c0" > <a href="#464"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $self->filter_item_list( \%r, %o ); </td> </tr> <tr> <td class="h" > <a name="463">463</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="464">464</a> </td> <td class="c0" > <a href="#471"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return \%r; </td> </tr> <tr> <td class="h" > <a name="465">465</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } ## end sub get_tiezi_ref </td> </tr> <tr> <td class="h" > <a name="466">466</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="467">467</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ### }}} </td> </tr> <tr> <td class="h" > <a name="468">468</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="469">469</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ### {{{ iterate_ref </td> </tr> <tr> <td class="h" > <a name="470">470</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub get_iterate_ref { </td> </tr> <tr> <td class="h" > <a name="471">471</a> </td> <td class="c0" > <a href="#479"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#471-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#471-1"> 0 </a> </td> <td > 0 </td> <td class="s"> my ( $self, $url, %o ) = @_; </td> </tr> <tr> <td class="h" > <a name="472">472</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my ( $info, $item_list ) = $self->{browser}->request_url_whole( </td> </tr> <tr> <td class="h" > <a name="473">473</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $url, </td> </tr> <tr> <td class="h" > <a name="474">474</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> post_data => $o{post_data}, </td> </tr> <tr> <td class="h" > <a name="475">475</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> info_sub => sub { </td> </tr> <tr> <td class="h" > <a name="476">476</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $self->extract_elements( </td> </tr> <tr> <td class="h" > <a name="477">477</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> @_, </td> </tr> <tr> <td class="h" > <a name="478">478</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> path => {}, </td> </tr> <tr> <td class="h" > <a name="479">479</a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> sub => sub { my ( $self, $html_ref, $r ) = @_; return $r; }, </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="#480"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="480">480</a> </td> <td class="c0" > <a href="#482"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#480-1"> 0 </a> </td> <td >   </td> <td > 0 </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="481">481</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> }, </td> </tr> <tr> <td class="h" > <a name="482">482</a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#482-1"> 0 </a> </td> <td >   </td> <td > 0 </td> <td class="s"> item_sub => sub { my ( $self, $html_ref ) = @_; return {}; }, </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="#483"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="483">483</a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#483-1"> 0 </a> </td> <td >   </td> <td > 0 </td> <td class="s"> item_list_sub => sub { my ( $self, $html_ref ) = @_; return []; }, </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="#488"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="484">484</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="485">485</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #min_page_num => $o{"min_page_num"}, </td> </tr> <tr> <td class="h" > <a name="486">486</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #max_page_num => $o{"max_page_num"}, </td> </tr> <tr> <td class="h" > <a name="487">487</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> stop_sub => sub { </td> </tr> <tr> <td class="h" > <a name="488">488</a> </td> <td class="c0" > <a href="#489"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#488-1"> 0 </a> </td> <td >   </td> <td > 0 </td> <td class="s"> my ( $info, $data_list, $i ) = @_; </td> </tr> <tr> <td class="h" > <a name="489">489</a> </td> <td class="c0" > <a href="#491"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $self->{browser}->is_list_overflow( $data_list, $o{"max_item_num"} ); </td> </tr> <tr> <td class="h" > <a name="490">490</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> }, </td> </tr> <tr> <td class="h" > <a name="491">491</a> </td> <td class="c0" > <a href="#494"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> %o, </td> </tr> <tr> <td class="h" > <a name="492">492</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="493">493</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="494">494</a> </td> <td class="c0" > <a href="#496"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $info->{item_list} = $self->update_item_list( $item_list, $url ); </td> </tr> <tr> <td class="h" > <a name="495">495</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="496">496</a> </td> <td class="c0" > <a href="#519"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $info; </td> </tr> <tr> <td class="h" > <a name="497">497</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } ## end sub get_iterate_ref </td> </tr> <tr> <td class="h" > <a name="498">498</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="499">499</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ### }}} </td> </tr> <tr> <td class="h" > <a name="500">500</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="501">501</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ### {{{ base </td> </tr> <tr> <td class="h" > <a name="502">502</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="503">503</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub update_item_list { </td> </tr> <tr> <td class="h" > <a name="504">504</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#504-1"> 6 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#504-1"> 0 </a> </td> <td > 15 </td> <td class="s"> my ( $self, $arr, $base_url ) = @_; </td> </tr> <tr> <td class="h" > <a name="505">505</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="506">506</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 9 </td> <td class="s"> my %rem; </td> </tr> <tr> <td class="h" > <a name="507">507</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 12 </td> <td class="s"> for my $chap (@$arr){ </td> </tr> <tr> <td class="h" > <a name="508">508</a> </td> <td class="c3" > 25 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#508-1"> 50 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#508-1"> 0 </a> </td> <td >   </td> <td >   </td> <td > 55 </td> <td class="s"> $chap = { url => $chap || '' } if ( ref( $chap ) ne 'HASH' ); </td> </tr> <tr> <td class="h" > <a name="509">509</a> </td> <td class="c3" > 25 </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#509-1"> 100 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 51 </td> <td class="s"> if ( $chap->{url} ) { </td> </tr> <tr> <td class="h" > <a name="510">510</a> </td> <td class="c3" > 13 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 58 </td> <td class="s"> $chap->{url} = $self->format_abs_url( $chap->{url}, $base_url ); </td> </tr> <tr> <td class="h" > <a name="511">511</a> </td> <td class="c3" > 13 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 8503 </td> <td class="s"> $rem{ $chap->{url} }++; </td> </tr> <tr> <td class="h" > <a name="512">512</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="513">513</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="514">514</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="515">515</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 11 </td> <td class="s"> my $i = 0; </td> </tr> <tr> <td class="h" > <a name="516">516</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 11 </td> <td class="s"> my @res; </td> </tr> <tr> <td class="h" > <a name="517">517</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 11 </td> <td class="s"> for my $chap ( @$arr ) { </td> </tr> <tr> <td class="h" > <a name="518">518</a> </td> <td class="c3" > 25 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#518-1"> 50 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#518-1"> 66 </a> </td> <td >   </td> <td >   </td> <td > 72 </td> <td class="s"> if($chap->{url} and $rem{ $chap->{url} }>1){ </td> </tr> <tr> <td class="h" > <a name="519">519</a> </td> <td class="c0" > <a href="#530"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $rem{$chap->{url}}--; </td> </tr> <tr> <td class="h" > <a name="520">520</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> }else{ </td> </tr> <tr> <td class="h" > <a name="521">521</a> </td> <td class="c3" > 25 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 29 </td> <td class="s"> ++$i; </td> </tr> <tr> <td class="h" > <a name="522">522</a> </td> <td class="c3" > 25 </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#522-1"> 66 </a> </td> <td >   </td> <td >   </td> <td > 67 </td> <td class="s"> $chap->{pid} //= $i; #page id </td> </tr> <tr> <td class="h" > <a name="523">523</a> </td> <td class="c3" > 25 </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#523-1"> 66 </a> </td> <td >   </td> <td >   </td> <td > 50 </td> <td class="s"> $chap->{id} //= $i; #item id </td> </tr> <tr> <td class="h" > <a name="524">524</a> </td> <td class="c3" > 25 </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#524-1"> 100 </a> </td> <td >   </td> <td >   </td> <td > 58 </td> <td class="s"> $chap->{content} //= ''; </td> </tr> <tr> <td class="h" > <a name="525">525</a> </td> <td class="c3" > 25 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#525-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 121 </td> <td class="s"> push @res, $chap unless($chap->{content}=~m#正在手打中#s); </td> </tr> <tr> <td class="h" > <a name="526">526</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="527">527</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="528">528</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="529">529</a> </td> <td class="c3" > 6 </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#529-1"> 33 </a> </td> <td >   </td> <td >   </td> <td > 34 </td> <td class="s"> while(@res and $res[-1]{content}=~m#正在手打中#s ){ </td> </tr> <tr> <td class="h" > <a name="530">530</a> </td> <td class="c0" > <a href="#562"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> pop @res; </td> </tr> <tr> <td class="h" > <a name="531">531</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="532">532</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="533">533</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #$i = $arr->[-1]{id} if ( $#$arr >= 0 and exists $arr->[-1]{id} and $arr->[-1]{id} > $i ); </td> </tr> <tr> <td class="h" > <a name="534">534</a> </td> <td class="c3" > 6 </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#534-1"> 100 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 31 </td> <td class="s"> return wantarray ? ( \@res, $i ) : \@res; </td> </tr> <tr> <td class="h" > <a name="535">535</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } ## end sub update_item_list </td> </tr> <tr> <td class="h" > <a name="536">536</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="537">537</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub format_abs_url { </td> </tr> <tr> <td class="h" > <a name="538">538</a> </td> <td class="c3" > 16 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#538-1"> 16 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#538-1"> 0 </a> </td> <td > 33 </td> <td class="s"> my ( $self, $url, $base_url ) = @_; </td> </tr> <tr> <td class="h" > <a name="539">539</a> </td> <td class="c3" > 16 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#539-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 29 </td> <td class="s"> return $url unless ( $base_url ); </td> </tr> <tr> <td class="h" > <a name="540">540</a> </td> <td class="c3" > 16 </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#540-1"> 100 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 53 </td> <td class="s"> return $url unless ( $base_url =~ /^https?:/ ); </td> </tr> <tr> <td class="h" > <a name="541">541</a> </td> <td class="c3" > 14 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 48 </td> <td class="s"> my $abs_url = URI->new_abs( $url, $base_url )->as_string; </td> </tr> <tr> <td class="h" > <a name="542">542</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="543">543</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="544">544</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub extract_elements { </td> </tr> <tr> <td class="h" > <a name="545">545</a> </td> <td class="c3" > 4 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#545-1"> 4 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#545-1"> 0 </a> </td> <td > 27 </td> <td class="s"> my ( $self, $h, %o ) = @_; </td> </tr> <tr> <td class="h" > <a name="546">546</a> </td> <td class="c3" > 4 </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#546-1"> 50 </a> </td> <td >   </td> <td >   </td> <td > 18 </td> <td class="s"> $o{path} ||= {}; </td> </tr> <tr> <td class="h" > <a name="547">547</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="548">548</a> </td> <td class="c3" > 4 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 13 </td> <td class="s"> my $r = {}; </td> </tr> <tr> <td class="h" > <a name="549">549</a> </td> <td class="c3" > 4 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 8 </td> <td class="s"> while ( my ( $xk, $xr ) = each %{ $o{path} } ) { </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 40 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="550">550</a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 17 </td> <td class="s"> $r->{$xk} = $self->scrape_element_try( $h, $xr ); </td> </tr> <tr> <td class="h" > <a name="551">551</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="552">552</a> </td> <td class="c3" > 4 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#552-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 31 </td> <td class="s"> $r = $o{sub}->( $self, $h, $r ) if ( $o{sub} ); </td> </tr> <tr> <td class="h" > <a name="553">553</a> </td> <td class="c3" > 4 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 21 </td> <td class="s"> return $r; </td> </tr> <tr> <td class="h" > <a name="554">554</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="555">555</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="556">556</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub scrape_element_try { </td> </tr> <tr> <td class="h" > <a name="557">557</a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#557-1"> 2 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#557-1"> 0 </a> </td> <td > 8 </td> <td class="s"> my ( $self, $h, $r_list, %o ) = @_; </td> </tr> <tr> <td class="h" > <a name="558">558</a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 6 </td> <td class="s"> my $c; </td> </tr> <tr> <td class="h" > <a name="559">559</a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 10 </td> <td class="s"> for my $path_or_regex ( @$r_list ) { </td> </tr> <tr> <td class="h" > <a name="560">560</a> </td> <td class="c3" > 4 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 20 </td> <td class="s"> $c = $self->scrape_element( $h, $path_or_regex ); </td> </tr> <tr> <td class="h" > <a name="561">561</a> </td> <td class="c3" > 4 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#561-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 16 </td> <td class="s"> next unless ( $c ); </td> </tr> <tr> <td class="h" > <a name="562">562</a> </td> <td class="c0" > <a href="#563"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#562-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $c = $o{sub}->( $self, $c ) if ( exists $o{sub} ); </td> </tr> <tr> <td class="h" > <a name="563">563</a> </td> <td class="c0" > <a href="#564"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#563-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> next unless ( $c ); </td> </tr> <tr> <td class="h" > <a name="564">564</a> </td> <td class="c0" > <a href="#577"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $c; </td> </tr> <tr> <td class="h" > <a name="565">565</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="566">566</a> </td> <td class="c3" > 2 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 12 </td> <td class="s"> return; </td> </tr> <tr> <td class="h" > <a name="567">567</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="568">568</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="569">569</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub scrape_element { </td> </tr> <tr> <td class="h" > <a name="570">570</a> </td> <td class="c3" > 4 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#570-1"> 4 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#570-1"> 0 </a> </td> <td > 10 </td> <td class="s"> my ( $self, $h, $o ) = @_; </td> </tr> <tr> <td class="h" > <a name="571">571</a> </td> <td class="c3" > 4 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#571-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 15 </td> <td class="s"> return $self->extract_regex_element( $h, $o->{regex} ) if ( $o->{regex} ); </td> </tr> <tr> <td class="h" > <a name="572">572</a> </td> <td class="c3" > 4 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#572-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 14 </td> <td class="s"> return $o->{sub}->( $h ) unless ( $o->{path} ); </td> </tr> <tr> <td class="h" > <a name="573">573</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="574">574</a> </td> <td class="c3" > 4 </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#574-1"> 50 </a> </td> <td >   </td> <td >   </td> <td > 25 </td> <td class="s"> $o->{extract} ||= 'TEXT'; </td> </tr> <tr> <td class="h" > <a name="575">575</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="576">576</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $parse = $o->{is_list} </td> </tr> <tr> <td class="h" > <a name="577">577</a> </td> <td class="c0" > <a href="#582"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#577-1"> 0 </a> </td> <td >   </td> <td > 0 </td> <td class="s"> ? scraper { process $o->{path}, 'data[]' => $o->{extract}; } </td> </tr> <tr> <td class="h" > <a name="578">578</a> </td> <td class="c3" > 4 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#578-1"> 50 </a> </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#578-1"> 4 </a> </td> <td >   </td> <td > 43 </td> <td class="s"> : scraper { process_first $o->{path}, 'data' => $o->{extract}; }; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 4 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 185617 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="579">579</a> </td> <td class="c3" > 4 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 55 </td> <td class="s"> my $r = $parse->scrape( $h ); </td> </tr> <tr> <td class="h" > <a name="580">580</a> </td> <td class="c3" > 4 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#580-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 168260 </td> <td class="s"> return unless ( defined $r->{data} ); </td> </tr> <tr> <td class="h" > <a name="581">581</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="582">582</a> </td> <td class="c0" > <a href="#583"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#582-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $r->{data} unless ( $o->{sub} ); </td> </tr> <tr> <td class="h" > <a name="583">583</a> </td> <td class="c0" > <a href="#587"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $o->{sub}->( $r->{data} ); </td> </tr> <tr> <td class="h" > <a name="584">584</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="585">585</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="586">586</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub extract_regex_element { </td> </tr> <tr> <td class="h" > <a name="587">587</a> </td> <td class="c0" > <a href="#588"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#587-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#587-1"> 0 </a> </td> <td > 0 </td> <td class="s"> my ( $self, $h, $reg ) = @_; </td> </tr> <tr> <td class="h" > <a name="588">588</a> </td> <td class="c0" > <a href="#589"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> my ( $d ) = $$h =~ m#$reg#s; </td> </tr> <tr> <td class="h" > <a name="589">589</a> </td> <td class="c0" > <a href="#605"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $d; </td> </tr> <tr> <td class="h" > <a name="590">590</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="591">591</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="592">592</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub filter_item_list { </td> </tr> <tr> <td class="h" > <a name="593">593</a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#593-1"> 3 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#593-1"> 0 </a> </td> <td > 8 </td> <td class="s"> my ( $self, $r, %o ) = @_; </td> </tr> <tr> <td class="h" > <a name="594">594</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="595">595</a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 8 </td> <td class="s"> my $flist = $r->{item_list}; </td> </tr> <tr> <td class="h" > <a name="596">596</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="597">597</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> #$r->{item_num} //= $flist->[-1]{id} // scalar( @$flist ); </td> </tr> <tr> <td class="h" > <a name="598">598</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="599">599</a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 25 </td> <td class="s"> $flist->[$_]{content} = $self->tidy_content( $flist->[$_]{content} ) for ( 0 .. $#$flist ); </td> </tr> <tr> <td class="h" > <a name="600">600</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="601">601</a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 9 </td> <td class="s"> $flist = [ grep { $self->{browser}->is_item_in_range( $_->{id}, $o{min_item_num}, $o{max_item_num} ) } @$flist ]; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 9 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 35 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="602">602</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="603">603</a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 18 </td> <td class="s"> $self->calc_content_wordnum( $_ ) for @$flist; </td> </tr> <tr> <td class="h" > <a name="604">604</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="605">605</a> </td> <td class="c0" > <a href="#608"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $flist = [ grep { $_->{word_num} >= $o{min_content_word_num} } @$flist ] </td> </tr> <tr> <td class="h" > <a name="606">606</a> </td> <td class="c3" > 3 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#606-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 8 </td> <td class="s"> if ( $o{min_content_word_num} ); </td> </tr> <tr> <td class="h" > <a name="607">607</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="608">608</a> </td> <td class="c0" > <a href="#611"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $flist = [ grep { $_->{writer} eq $r->{writer} } @$flist ] </td> </tr> <tr> <td class="h" > <a name="609">609</a> </td> <td class="c3" > 3 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#609-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 9 </td> <td class="s"> if ( $o{only_poster} ); </td> </tr> <tr> <td class="h" > <a name="610">610</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="611">611</a> </td> <td class="c0" > <a href="#614"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $flist = [ grep { $_->{content} =~ /$o{grep_content}/s } @$flist ] </td> </tr> <tr> <td class="h" > <a name="612">612</a> </td> <td class="c3" > 3 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#612-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 8 </td> <td class="s"> if ( $o{grep_content} ); </td> </tr> <tr> <td class="h" > <a name="613">613</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="614">614</a> </td> <td class="c0" > <a href="#635"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> $flist = [ grep { $_->{content} !~ /$o{filter_content}/s } @$flist ] </td> </tr> <tr> <td class="h" > <a name="615">615</a> </td> <td class="c3" > 3 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#615-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 9 </td> <td class="s"> if ( $o{filter_content} ); </td> </tr> <tr> <td class="h" > <a name="616">616</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="617">617</a> </td> <td class="c3" > 3 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#617-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 5 </td> <td class="s"> $flist = [ grep { defined $_->{content} and $_->{content} =~ /\S/s } @$flist ]; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 37 </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="618">618</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="619">619</a> </td> <td class="c3" > 3 </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#619-1"> 50 </a> </td> <td >   </td> <td >   </td> <td > 11 </td> <td class="s"> $r->{item_list} = $flist || []; </td> </tr> <tr> <td class="h" > <a name="620">620</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="621">621</a> </td> <td class="c3" > 3 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 6 </td> <td class="s"> return $self; </td> </tr> <tr> <td class="h" > <a name="622">622</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } ## end sub filter_item_list </td> </tr> <tr> <td class="h" > <a name="623">623</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="624">624</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub calc_content_wordnum { </td> </tr> <tr> <td class="h" > <a name="625">625</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#625-1"> 7 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#625-1"> 0 </a> </td> <td > 13 </td> <td class="s"> my ( $self, $f ) = @_; </td> </tr> <tr> <td class="h" > <a name="626">626</a> </td> <td class="c3" > 7 </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#626-1"> 50 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 18 </td> <td class="s"> return if ( $f->{word_num} ); </td> </tr> <tr> <td class="h" > <a name="627">627</a> </td> <td class="c3" > 7 </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#627-1"> 50 </a> </td> <td >   </td> <td >   </td> <td > 16 </td> <td class="s"> my $wd = $f->{content} || ''; </td> </tr> <tr> <td class="h" > <a name="628">628</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 125 </td> <td class="s"> $wd =~ s/<[^>]+>//gs; </td> </tr> <tr> <td class="h" > <a name="629">629</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 89 </td> <td class="s"> $wd =~ s/\s+//sg; </td> </tr> <tr> <td class="h" > <a name="630">630</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 643 </td> <td class="s"> $f->{word_num} = $wd =~ s/\S//gs; </td> </tr> <tr> <td class="h" > <a name="631">631</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 16 </td> <td class="s"> return $f; </td> </tr> <tr> <td class="h" > <a name="632">632</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="633">633</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="634">634</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub tidy_writer_book { </td> </tr> <tr> <td class="h" > <a name="635">635</a> </td> <td class="c0" > <a href="#636"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#635-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#635-1"> 0 </a> </td> <td > 0 </td> <td class="s"> my ( $self, $c ) = @_; </td> </tr> <tr> <td class="h" > <a name="636">636</a> </td> <td class="c0" > <a href="#637"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#636-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return unless ( defined $c ); </td> </tr> <tr> <td class="h" > <a name="637">637</a> </td> <td class="c0" > <a href="#638"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> for ( $c ) { </td> </tr> <tr> <td class="h" > <a name="638">638</a> </td> <td class="c0" > <a href="#639"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/作\s*者://; </td> </tr> <tr> <td class="h" > <a name="639">639</a> </td> <td class="c0" > <a href="#640"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/^\s*作者-\s*//; </td> </tr> <tr> <td class="h" > <a name="640">640</a> </td> <td class="c0" > <a href="#641"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/小说全集//; </td> </tr> <tr> <td class="h" > <a name="641">641</a> </td> <td class="c0" > <a href="#642"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/作品全集//; </td> </tr> <tr> <td class="h" > <a name="642">642</a> </td> <td class="c0" > <a href="#643"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/专栏//; </td> </tr> <tr> <td class="h" > <a name="643">643</a> </td> <td class="c0" > <a href="#644"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/^.*版权属于作者([^,]+)$/$1/; </td> </tr> <tr> <td class="h" > <a name="644">644</a> </td> <td class="c0" > <a href="#645"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/\s*最新章节\s*$//; </td> </tr> <tr> <td class="h" > <a name="645">645</a> </td> <td class="c0" > <a href="#646"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/全文阅读//; </td> </tr> <tr> <td class="h" > <a name="646">646</a> </td> <td class="c0" > <a href="#647"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/在线阅读//; </td> </tr> <tr> <td class="h" > <a name="647">647</a> </td> <td class="c0" > <a href="#648"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/全集下载//; </td> </tr> <tr> <td class="h" > <a name="648">648</a> </td> <td class="c0" > <a href="#649"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/章节目录//; </td> </tr> <tr> <td class="h" > <a name="649">649</a> </td> <td class="c0" > <a href="#650"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/^\s*《(.*)》\s*$/$1/; </td> </tr> <tr> <td class="h" > <a name="650">650</a> </td> <td class="c0" > <a href="#651"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/^\s+|\s+$//g; </td> </tr> <tr> <td class="h" > <a name="651">651</a> </td> <td class="c0" > <a href="#653"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> s/\s+//g; </td> </tr> <tr> <td class="h" > <a name="652">652</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="653">653</a> </td> <td class="c0" > <a href="#687"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 0 </td> <td class="s"> return $c; </td> </tr> <tr> <td class="h" > <a name="654">654</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } ## end sub tidy_writer_book </td> </tr> <tr> <td class="h" > <a name="655">655</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="656">656</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub tidy_content { </td> </tr> <tr> <td class="h" > <a name="657">657</a> </td> <td class="c3" > 9 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#657-1"> 9 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#657-1"> 0 </a> </td> <td > 18 </td> <td class="s"> my ( $self, $c ) = @_; </td> </tr> <tr> <td class="h" > <a name="658">658</a> </td> <td class="c3" > 9 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 15 </td> <td class="s"> for ( $c ) { </td> </tr> <tr> <td class="h" > <a name="659">659</a> </td> <td class="c3" > 9 </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#659-1"> 100 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td > 17 </td> <td class="s"> last unless ( $c ); </td> </tr> <tr> <td class="h" > <a name="660">660</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 30 </td> <td class="s"> s# ##sg; </td> </tr> <tr> <td class="h" > <a name="661">661</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 67 </td> <td class="s"> s# #\n#sg; </td> </tr> <tr> <td class="h" > <a name="662">662</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 80 </td> <td class="s"> s#\s{5,}#\n#sg; </td> </tr> <tr> <td class="h" > <a name="663">663</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 33 </td> <td class="s"> s#<script(\s+[^>]+\>|\>)[^<]*</script>##sg; </td> </tr> <tr> <td class="h" > <a name="664">664</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 295 </td> <td class="s"> s#\s*\<[^>]+?\>#\n#sg; </td> </tr> <tr> <td class="h" > <a name="665">665</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 80 </td> <td class="s"> s{\n+}{\n}sg; </td> </tr> <tr> <td class="h" > <a name="666">666</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 389 </td> <td class="s"> s{\s*(\S.*?)\s*\n}{\n<p>$1</p>}sg; </td> </tr> <tr> <td class="h" > <a name="667">667</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 39 </td> <td class="s"> s#\s+上一章\s+.+?下一章.+$##s; </td> </tr> <tr> <td class="h" > <a name="668">668</a> </td> <td class="c3" > 7 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 28 </td> <td class="s"> s#[^\n]+加入书签[^\n]+##s; </td> </tr> <tr> <td class="h" > <a name="669">669</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="670">670</a> </td> <td class="c3" > 9 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 44 </td> <td class="s"> return $c; </td> </tr> <tr> <td class="h" > <a name="671">671</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="672">672</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="673">673</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub tidy_string { </td> </tr> <tr> <td class="h" > <a name="674">674</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td class="c3" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#674-1"> 6 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#674-1"> 0 </a> </td> <td > 14 </td> <td class="s"> my ( $self, $c ) = @_; </td> </tr> <tr> <td class="h" > <a name="675">675</a> </td> <td class="c3" > 6 </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--condition.html#675-1"> 50 </a> </td> <td >   </td> <td >   </td> <td > 12 </td> <td class="s"> $c ||= ''; </td> </tr> <tr> <td class="h" > <a name="676">676</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 11 </td> <td class="s"> for ( $c ) { </td> </tr> <tr> <td class="h" > <a name="677">677</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 18 </td> <td class="s"> s/^\s+|\s+$//gs; </td> </tr> <tr> <td class="h" > <a name="678">678</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 11 </td> <td class="s"> s/[\*\/\\\[\(\)]+//g; </td> </tr> <tr> <td class="h" > <a name="679">679</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 12 </td> <td class="s"> s/[[:punct:]]//sg; </td> </tr> <tr> <td class="h" > <a name="680">680</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 11 </td> <td class="s"> s/[\]\s+\/\\]/-/g; </td> </tr> <tr> <td class="h" > <a name="681">681</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="682">682</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="683">683</a> </td> <td class="c3" > 6 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 16 </td> <td class="s"> return $c; </td> </tr> <tr> <td class="h" > <a name="684">684</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="685">685</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="686">686</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub get_inner_html { </td> </tr> <tr> <td class="h" > <a name="687">687</a> </td> <td class="c0" > <a href="#689"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#687-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#687-1"> 0 </a> </td> <td >   </td> <td class="s"> my ( $self, $h ) = @_; </td> </tr> <tr> <td class="h" > <a name="688">688</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="689">689</a> </td> <td class="c0" > <a href="#691"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--branch.html#689-1"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> return '' unless ( $h ); </td> </tr> <tr> <td class="h" > <a name="690">690</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="691">691</a> </td> <td class="c0" > <a href="#692"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $head_i = index( $h, '>' ); </td> </tr> <tr> <td class="h" > <a name="692">692</a> </td> <td class="c0" > <a href="#694"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> substr( $h, 0, $head_i + 1 ) = ''; </td> </tr> <tr> <td class="h" > <a name="693">693</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="694">694</a> </td> <td class="c0" > <a href="#695"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $tail_i = rindex( $h, '<' ); </td> </tr> <tr> <td class="h" > <a name="695">695</a> </td> <td class="c0" > <a href="#697"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> substr( $h, $tail_i ) = ''; </td> </tr> <tr> <td class="h" > <a name="696">696</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="697">697</a> </td> <td class="c0" > <a href="#701"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> return $h; </td> </tr> <tr> <td class="h" > <a name="698">698</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="699">699</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="700">700</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub unescape_js { </td> </tr> <tr> <td class="h" > <a name="701">701</a> </td> <td class="c0" > <a href="#702"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#701-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#701-1"> 0 </a> </td> <td >   </td> <td class="s"> my ( $self, $s ) = @_; </td> </tr> <tr> <td class="h" > <a name="702">702</a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $s =~ s/%u([0-9a-f]{4})/chr(hex($1))/eigs; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="#703"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="703">703</a> </td> <td class="c0" > <a href="# "> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $s =~ s/%([0-9a-f]{2})/chr(hex($1))/eigs; </td> </tr> <tr> <td class="h" > <a > </a> </td> <td class="c0" > <a href="#704"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="704">704</a> </td> <td class="c0" > <a href="#708"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> return $s; </td> </tr> <tr> <td class="h" > <a name="705">705</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="706">706</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="707">707</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> sub encode_cjk_for_url { </td> </tr> <tr> <td class="h" > <a name="708">708</a> </td> <td class="c0" > <a href="#709"> 0 </a> </td> <td >   </td> <td >   </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#708-1"> 0 </a> </td> <td class="c0" > <a href="blib-lib-Novel-Robot-Parser-pm--subroutine.html#708-1"> 0 </a> </td> <td >   </td> <td class="s"> my ( $self, $key ) = @_; </td> </tr> <tr> <td class="h" > <a name="709">709</a> </td> <td class="c0" > <a href="#710"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> my $b = uc( unpack( "H*", encode( $self->charset(), $key ) ) ); </td> </tr> <tr> <td class="h" > <a name="710">710</a> </td> <td class="c0" > <a href="#711"> 0 </a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> $b =~ s/(..)/%$1/g; </td> </tr> <tr> <td class="h" > <a name="711">711</a> </td> <td class="c0" > 0 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> return $b; </td> </tr> <tr> <td class="h" > <a name="712">712</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> } </td> </tr> <tr> <td class="h" > <a name="713">713</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="714">714</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ### }}} </td> </tr> <tr> <td class="h" > <a name="715">715</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> <tr> <td class="h" > <a name="716">716</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> 1; </td> </tr> <tr> <td class="h" > <a name="717">717</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s">   </td> </tr> </table> </body> </html>