File Coverage

blib/lib/HTML/ExtractText/Extra.pm
Criterion Covered Total %
statement 43 43 100.0
branch 14 18 77.7
condition n/a
subroutine 10 10 100.0
pod 3 3 100.0
total 70 74 94.5


line stmt bran cond sub pod time code
1             package HTML::ExtractText::Extra;
2              
3 1     1   20471 use strict;
  1         1  
  1         24  
4 1     1   3 use warnings;
  1         1  
  1         27  
5              
6             our $VERSION = '1.001003'; # VERSION
7              
8 1     1   412 use parent 'HTML::ExtractText';
  1         202  
  1         3  
9              
10 1     1   53412 use Devel::TakeHashArgs;
  1         2  
  1         40  
11 1     1   4 use Carp qw/croak/;
  1         1  
  1         286  
12              
13             sub new {
14 6     6 1 12196 my $class = shift;
15              
16 6 50       30 get_args_as_hash( \@_, \ my %args,
17             { # these are optional with defaults
18             whitespace => 1,
19             nbsp => 1,
20             },
21             ) or croak $@;
22              
23 6         118 my $self = $class->SUPER::new;
24              
25 6         266 $self->$_( $args{$_} ) for keys %args;
26              
27 6         22 return $self;
28             }
29              
30             sub _extract {
31 7     7   1192 my ( $self, $dom, $selector, $what ) = @_;
32              
33 7         7 my $want = $what->{ $selector };
34 7         6 my $find = $want;
35              
36 7 100       13 if ( ref $want eq 'ARRAY' ) {
37 4         6 $find = $want->[0];
38             }
39              
40 7     9   17 my @results = $dom->find( $find )->map(sub{ $self->_process })->each;
  9         1672  
41              
42 7         609 for ( @results ) {
43 9 50       13 $self->nbsp and tr/\x{00A0}/ /;
44 9 50       11 $self->whitespace and s/^\s+|\s+$//g;
45 9 100       20 if ( ref $want eq 'ARRAY' ) {
46 4 100       14 if ( ref $want->[1] eq 'Regexp' ) {
    50          
47 2         10 s/$want->[1]//g;
48             }
49             elsif ( ref $want->[1] eq 'CODE' ) {
50 2         6 $_ = $want->[1]->( $_ );
51             }
52             }
53             }
54              
55 7         25 return @results;
56             }
57              
58             sub whitespace {
59 19     19 1 30 my $self = shift;
60 19 100       25 if ( @_ ) { $self->[0]->{WHITESPACE} = shift; }
  7         10  
61 19         49 return $self->[0]->{WHITESPACE};
62             }
63              
64             sub nbsp {
65 19     19 1 18 my $self = shift;
66 19 100       32 if ( @_ ) { $self->[0]->{nbsp} = shift; }
  7         9  
67 19         49 return $self->[0]->{nbsp};
68             }
69              
70             q|
71             I called the janitor the other day to see what he could do about my
72             dingy linoleum floor. He said he would have been happy to loan me a
73             polisher, but that he hadn't the slightest idea what he had done with
74             it. I told him not to worry about it--that as a programmer
75             it wasn't the first time I had experienced a buffer
76             allocation failure due to a memory error.
77             |;
78              
79             __END__