File Coverage

blib/lib/NewsExtractor/JSONLDExtractor.pm
Criterion Covered Total %
statement 15 32 46.8
branch 0 6 0.0
condition 0 5 0.0
subroutine 5 10 50.0
pod 0 4 0.0
total 20 57 35.0


line stmt bran cond sub pod time code
1             use Moo;
2 1     1   6 extends 'NewsExtractor::TXExtractor';
  1         3  
  1         5  
3              
4             use Mojo::Transaction::HTTP;
5 1     1   250 use Types::Standard qw( InstanceOf HashRef ArrayRef );
  1         2  
  1         7  
6 1     1   26 use Mojo::JSON qw(from_json);
  1         2  
  1         6  
7 1     1   617 use Importer 'NewsExtractor::TextUtil' => qw(u remove_control_characters);
  1         1  
  1         45  
8 1     1   5  
  1         2  
  1         5  
9             has tx => (
10             required => 1, is => 'ro',
11             isa => InstanceOf['Mojo::Transaction::HTTP'] );
12              
13             has schema_ld => (
14             required => 0,
15             is => 'lazy',
16             isa => HashRef,
17             builder => 1,
18             );
19              
20             my ($self) = @_;
21             my $el = $self->dom->at('script[type="application/ld+json"]') or return {};
22 0     0     my $x = from_json( $el->text );
23 0 0         if (HashRef->check($x)) {
24 0           return $x;
25 0 0         }
26 0           if (ArrayRef->check($x)) {
27             return $x->[0];
28 0 0         }
29 0           return {};
30             }
31 0            
32             my ($self) = @_;
33             return remove_control_characters(u($self->schema_ld->{author}{name}));
34             }
35 0     0 0    
36 0           my ($self) = @_;
37             return remove_control_characters(u($self->schema_ld->{headline}));
38             }
39              
40 0     0 0   my ($self) = @_;
41 0           return remove_control_characters(u($self->schema_ld->{datePublished}));
42             }
43              
44             my ($self) = @_;
45 0     0 0   my $text = $self->schema_ld->{articleBody} // $self->schema_ld->{description} // '';
46 0           return remove_control_characters(u($text));
47             }
48              
49             1;