File Coverage

blib/lib/HTML/FormatText/Html2text.pm
Criterion Covered Total %
statement 37 56 66.0
branch 1 16 6.2
condition 2 9 22.2
subroutine 11 14 78.5
pod 2 2 100.0
total 53 97 54.6


line stmt bran cond sub pod time code
1             # Copyright 2008, 2009, 2010, 2013, 2015 Kevin Ryde
2              
3             # HTML-FormatExternal is free software; you can redistribute it and/or
4             # modify it under the terms of the GNU General Public License as published
5             # by the Free Software Foundation; either version 3, or (at your option) any
6             # later version.
7             #
8             # HTML-FormatExternal is distributed in the hope that it will be useful, but
9             # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
10             # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11             # for more details.
12             #
13             # You should have received a copy of the GNU General Public License along
14             # with HTML-FormatExternal. If not, see .
15              
16             package HTML::FormatText::Html2text;
17 1     1   3621 use 5.006;
  1         4  
18 1     1   7 use strict;
  1         1  
  1         38  
19 1     1   6 use warnings;
  1         2  
  1         37  
20 1     1   5 use HTML::FormatExternal;
  1         1  
  1         9  
21             our @ISA = ('HTML::FormatExternal');
22              
23             # uncomment this to run the ### lines
24             # use Smart::Comments;
25              
26             our $VERSION = 26;
27              
28 1     1   67 use constant DEFAULT_LEFTMARGIN => 0;
  1         2  
  1         103  
29 1     1   7 use constant DEFAULT_RIGHTMARGIN => 79;
  1         2  
  1         194  
30              
31             my $have_ascii;
32             my $have_utf8;
33             use constant::defer _check_help => sub { # run once only
34 1         14 my ($class) = @_;
35 1         5 my $help = $class->_run_version (['html2text', '-help']);
36 1   33     4 $have_ascii = (defined $help && $help =~ /-ascii/);
37 1   33     3 $have_utf8 = (defined $help && $help =~ /-utf8/);
38 1         3 return undef;
39 1     1   525 };
  1         883  
  1         13  
40              
41             # return true if the "-ascii" option is available (new in html2text
42             # version 1.3.2 from Jan 2004)
43             sub _have_ascii {
44 1     1   373 my ($class) = @_;
45 1         4 $class->_check_help();
46 1         29 return $have_ascii;
47             }
48              
49             # return true if the "-utf8" option is available (a Debian addition circa 2009)
50             sub _have_utf8 {
51 0     0   0 my ($class) = @_;
52 0         0 $class->_check_help();
53 0         0 return $have_utf8;
54             }
55              
56             # The Debian -utf8 option can give UTF-8 output.
57             # For input believe entitized is the only way to be confident of working
58             # with both original and Debian extended.
59             #
60 1     1   132 use constant _WIDE_INPUT_CHARSET => 'entitize';
  1         1  
  1         426  
61             sub _WIDE_OUTPUT_CHARSET {
62 0     0   0 my ($class) = @_;
63 0 0       0 return ($class->_have_utf8() ? 'UTF-8' : 'iso-8859-1');
64             }
65              
66             sub program_full_version {
67 5     5 1 1191 my ($self_or_class) = @_;
68 5         24 return $self_or_class->_run_version (['html2text','-version'], '2>&1');
69             }
70             sub program_version {
71 2     2 1 247 my ($self_or_class) = @_;
72 2         6 my $version = $self_or_class->program_full_version;
73 2 50       5 if (! defined $version) { return undef; }
  2         5  
74              
75             # eg. "This is html2text, version 1.3.2a"
76 0 0         $version =~ /^.*version (.*)/
77             or $version =~ /^(.*)/; # whole first line if format not recognised
78 0           return $1 . substr($version,0,0); # retain taintedness
79             }
80              
81             sub _make_run {
82 0     0     my ($class, $input_filename, $options) = @_;
83              
84             # -nobs means don't do underlining with "_ backspace X" sequences.
85             # Backspaces are fun for teletype output, but the intention here is plain
86             # text. The Debian html2text has -nobs by default anyway.
87             #
88 0           my @command = ('html2text', '-nobs');
89              
90 0 0         if (defined $options->{'_width'}) {
91 0           push @command, '-width', $options->{'_width'};
92             }
93              
94 0 0         if ($class->_have_ascii) {
95 0 0         if (my $output_charset = $options->{'output_charset'}) {
96 0           $output_charset = lc($output_charset);
97 0 0 0       if ($output_charset eq 'ascii' || $output_charset eq 'ansi_x3.4-1968') {
98 0           push @command, '-ascii';
99             }
100             }
101             }
102              
103             # 'html2text_options' not documented ...
104 0 0         push @command, @{$options->{'html2text_options'} || []};
  0            
105              
106             # "html2text -" input filename "-" means read standard input.
107             # Any other "-foo" starting with "-" is an option and there's no apparent
108             # "--" to mark the end of options (as of its version 1.3.2a).
109             #
110             # Normally html2text takes URL style file: or http:, but the debian
111             # version mangles it to a bare filename only. This makes it hard to
112             # escape a name suitably to get through both. Instead use standard input
113             # which both versions read by default.
114              
115 0           return (\@command,
116             '<', $input_filename);
117             }
118              
119             1;
120             __END__