line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::Mixi::Scraper::Plugin::ListDiary;
|
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
726
|
use strict;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
28
|
|
4
|
1
|
|
|
1
|
|
4
|
use warnings;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
18
|
|
5
|
1
|
|
|
1
|
|
5
|
use WWW::Mixi::Scraper::Plugin;
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
5
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
validator {qw(
|
8
|
|
|
|
|
|
|
id is_number
|
9
|
|
|
|
|
|
|
page is_number
|
10
|
|
|
|
|
|
|
year is_number
|
11
|
|
|
|
|
|
|
month is_number
|
12
|
|
|
|
|
|
|
)};
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub scrape {
|
15
|
0
|
|
|
0
|
1
|
|
my ($self, $html) = @_;
|
16
|
|
|
|
|
|
|
|
17
|
0
|
|
|
|
|
|
my %scraper;
|
18
|
|
|
|
|
|
|
$scraper{meta} = scraper {
|
19
|
0
|
|
|
0
|
|
|
process 'a',
|
20
|
|
|
|
|
|
|
text => 'TEXT',
|
21
|
|
|
|
|
|
|
href => '@href';
|
22
|
0
|
|
|
|
|
|
result qw( text href );
|
23
|
0
|
|
|
|
|
|
};
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
$scraper{diaries} = scraper {
|
26
|
0
|
|
|
0
|
|
|
process 'div.listDiaryTitle>dl>dd',
|
27
|
|
|
|
|
|
|
time => 'TEXT';
|
28
|
0
|
|
|
|
|
|
process 'div.listDiaryTitle>dl>dt>a',
|
29
|
|
|
|
|
|
|
link => '@href',
|
30
|
|
|
|
|
|
|
subject => 'TEXT';
|
31
|
0
|
|
|
|
|
|
process 'p',
|
32
|
|
|
|
|
|
|
description => 'TEXT';
|
33
|
0
|
|
|
|
|
|
process 'div.diaryPhoto>a>img',
|
34
|
|
|
|
|
|
|
'images[]' => '@src';
|
35
|
0
|
|
|
|
|
|
process 'div.diaryEditMenu>ul>li',
|
36
|
|
|
|
|
|
|
'meta[]' => $scraper{meta};
|
37
|
0
|
|
|
|
|
|
result qw( time link subject description images meta );
|
38
|
0
|
|
|
|
|
|
};
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$scraper{list} = scraper {
|
41
|
0
|
|
|
0
|
|
|
process 'div.listDiaryBlock,div.listDiaryBlockLast',
|
42
|
|
|
|
|
|
|
'diaries[]' => $scraper{diaries};
|
43
|
0
|
|
|
|
|
|
result qw( diaries );
|
44
|
0
|
|
|
|
|
|
};
|
45
|
|
|
|
|
|
|
|
46
|
0
|
|
|
|
|
|
my $stash = $self->post_process($scraper{list}->scrape(\$html));
|
47
|
|
|
|
|
|
|
|
48
|
0
|
|
|
|
|
|
foreach my $diary ( @{ $stash } ) {
|
|
0
|
|
|
|
|
|
|
49
|
0
|
|
|
|
|
|
my $meta = delete $diary->{meta};
|
50
|
0
|
0
|
|
|
|
|
foreach my $item ( @{ $meta || [] } ) {
|
|
0
|
|
|
|
|
|
|
51
|
0
|
0
|
0
|
|
|
|
if ( ($item->{href} || '') =~ /#(?:write|comment)$/ ) {
|
52
|
0
|
|
|
|
|
|
my ($count) = $item->{text} =~ /(\d+)/;
|
53
|
0
|
|
|
|
|
|
$diary->{count} = $count;
|
54
|
|
|
|
|
|
|
}
|
55
|
|
|
|
|
|
|
}
|
56
|
|
|
|
|
|
|
}
|
57
|
|
|
|
|
|
|
|
58
|
0
|
|
|
|
|
|
return $stash;
|
59
|
|
|
|
|
|
|
}
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
1;
|
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
__END__
|