File Coverage

blib/lib/NewsExtractor/CSSExtractor.pm
Criterion Covered Total %
statement 14 37 37.8
branch 0 8 0.0
condition n/a
subroutine 5 10 50.0
pod 0 4 0.0
total 19 59 32.2


line stmt bran cond sub pod time code
1             use v5.18;
2 1     1   13 use utf8;
  1         4  
3 1     1   5 use Moo;
  1         2  
  1         6  
4 1     1   20 extends 'NewsExtractor::TXExtractor';
  1         2  
  1         5  
5             use Importer 'NewsExtractor::TextUtil' => qw( normalize_whitespace remove_control_characters );
6 1     1   295  
  1         2  
  1         7  
7             use Types::Standard qw( InstanceOf );
8 1     1   32  
  1         2  
  1         9  
9             has css_selector => (
10             required => 1,
11             is => 'ro',
12             isa => InstanceOf['NewsExtractor::CSSRuleSet']
13             );
14              
15             my ($self, $sel) = @_;
16              
17 0     0     $self->dom->find("$sel style, $sel script")->map('remove');
18             my $txt = "". $self->dom->find( $sel )->map('all_text')->join("\n\n");
19 0           return undef if $txt eq '';
20 0            
21 0 0         $txt = normalize_whitespace(remove_control_characters($txt));
22             $txt =~ s/\s+$//;
23 0           $txt =~ s/^\s+//;
24 0           $txt =~ s/\n\n+/\n\n/g;
25 0           return $txt;
26 0           }
27 0            
28             my ($self) = @_;
29             my $ret = $self->_take($self->css_selector->headline) or return;
30             $ret =~ s/\n/ /g;
31 0     0 0   return normalize_whitespace($ret);
32 0 0         }
33 0            
34 0           my ($self) = @_;
35             my $ret = $self->_take($self->css_selector->dateline) or return;
36             $ret =~ s/\n/ /g;
37             return normalize_whitespace($ret);
38 0     0 0   }
39 0 0          
40 0           my ($self) = @_;
41 0           my $ret = $self->_take( $self->css_selector->journalist ) or return;
42             $ret =~ s/\n/ /g;
43             return normalize_whitespace($ret);
44             }
45 0     0 0    
46 0 0         my ($self) = @_;
47 0           return $self->_take( $self->css_selector->content_text );
48 0           }
49              
50             1;