File Coverage

blib/lib/NewsExtractor/SiteSpecificExtractor/newnet_tw.pm
Criterion Covered Total %
statement 9 29 31.0
branch 0 8 0.0
condition 0 3 0.0
subroutine 3 7 42.8
pod 0 4 0.0
total 12 51 23.5


line stmt bran cond sub pod time code
1             package NewsExtractor::SiteSpecificExtractor::newnet_tw;
2 1     1   7 use utf8;
  1         3  
  1         6  
3 1     1   34 use Moo;
  1         2  
  1         6  
4             extends 'NewsExtractor::GenericExtractor';
5              
6 1     1   371 use Importer 'NewsExtractor::TextUtil' => qw( reformat_dateline );
  1         2  
  1         6  
7              
8             sub headline {
9 0     0 0   my ($self) = @_;
10 0           my $el = $self->dom->at('h4 > b');
11 0           return $el->all_text;
12             }
13              
14             sub content_text {
15 0     0 0   my ($self) = @_;
16 0           $self->dom->find('label[for="ctl00_ContentPlaceHolder1_RadioButton6"], label[for="ctl00_ContentPlaceHolder1_RadioButton5"], label[for="ctl00_ContentPlaceHolder1_RadioButton4"], label[for="ctl00_ContentPlaceHolder1_RadioButton3"], label[for="ctl00_ContentPlaceHolder1_RadioButton2"], label[for="ctl00_ContentPlaceHolder1_RadioButton1"]')->map('remove');
17 0           return $self->SUPER::content_text();
18             }
19              
20             sub dateline {
21 0     0 0   my ($self) = @_;
22              
23 0           my ($dateline, $el);
24 0 0         if ($el = $self->dom->at('b > font[color=darkred]')) {
25             # Example: 日期:2020/7/8 下午 08:52:39
26 0           $dateline = reformat_dateline( $el->all_text(), '+08:00' );
27             }
28 0           return $dateline;
29             }
30              
31             sub journalist {
32 0     0 0   my ($self) = @_;
33              
34             # .col-md-8 > div:nth-child(4) > table:nth-child(1) > tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(2) > b:nth-child(1) > font:nth-child(3)
35              
36 0           my ($txt, $el);
37              
38 0 0         if ($el = $self->dom->at('b > font[color=darkred]')) {
39 0           ($txt) = $el->all_text =~ m/ (?:專題|採訪): \s* (\S+) \s* 日期/x;
40             }
41 0 0 0       if ((!$txt) && ($el = $self->dom->at('#ctl00_ContentPlaceHolder1_UpdatePanel2 a[href*="Search.aspx?report="]'))) {
42 0           $txt = $el->text;
43             }
44 0 0         unless ($txt) {
45 0           ($txt) = $self->content_text =~ m<\A〔新網記者 ( \p{Letter}+ (?:報導|特稿))〕\b>x;
46             }
47 0           return $txt;
48             }
49              
50             1;