File Coverage

blib/lib/NewsExtractor/SiteSpecificExtractor/newnet_tw.pm
Criterion Covered Total %
statement 9 29 31.0
branch 0 8 0.0
condition 0 3 0.0
subroutine 3 7 42.8
pod 0 4 0.0
total 12 51 23.5


line stmt bran cond sub pod time code
1             use utf8;
2 1     1   6 use Moo;
  1         2  
  1         5  
3 1     1   25 extends 'NewsExtractor::GenericExtractor';
  1         2  
  1         4  
4              
5             use Importer 'NewsExtractor::TextUtil' => qw( reformat_dateline );
6 1     1   331  
  1         2  
  1         5  
7             my ($self) = @_;
8             my $el = $self->dom->at('h4 > b');
9 0     0 0   return $el->all_text;
10 0           }
11 0            
12             my ($self) = @_;
13             $self->dom->find('label[for="ctl00_ContentPlaceHolder1_RadioButton6"], label[for="ctl00_ContentPlaceHolder1_RadioButton5"], label[for="ctl00_ContentPlaceHolder1_RadioButton4"], label[for="ctl00_ContentPlaceHolder1_RadioButton3"], label[for="ctl00_ContentPlaceHolder1_RadioButton2"], label[for="ctl00_ContentPlaceHolder1_RadioButton1"]')->map('remove');
14             return $self->SUPER::content_text();
15 0     0 0   }
16 0            
17 0           my ($self) = @_;
18              
19             my ($dateline, $el);
20             if ($el = $self->dom->at('b > font[color=darkred]')) {
21 0     0 0   # Example: 日期:2020/7/8 下午 08:52:39
22             $dateline = reformat_dateline( $el->all_text(), '+08:00' );
23 0           }
24 0 0         return $dateline;
25             }
26 0            
27             my ($self) = @_;
28 0            
29             # .col-md-8 > div:nth-child(4) > table:nth-child(1) > tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(2) > b:nth-child(1) > font:nth-child(3)
30              
31             my ($txt, $el);
32 0     0 0    
33             if ($el = $self->dom->at('b > font[color=darkred]')) {
34             ($txt) = $el->all_text =~ m/ (?:專題|採訪): \s* (\S+) \s* 日期/x;
35             }
36 0           if ((!$txt) && ($el = $self->dom->at('#ctl00_ContentPlaceHolder1_UpdatePanel2 a[href*="Search.aspx?report="]'))) {
37             $txt = $el->text;
38 0 0         }
39 0           unless ($txt) {
40             ($txt) = $self->content_text =~ m<\A〔新網記者 ( \p{Letter}+ (?:報導|特稿))〕\b>x;
41 0 0 0       }
42 0           return $txt;
43             }
44 0 0          
45 0           1;