| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# Copyright 2008, 2009, 2010, 2011, 2012, 2013, 2015 Kevin Ryde |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# HTML-FormatExternal is free software; you can redistribute it and/or |
|
4
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License as published |
|
5
|
|
|
|
|
|
|
# by the Free Software Foundation; either version 3, or (at your option) any |
|
6
|
|
|
|
|
|
|
# later version. |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# HTML-FormatExternal is distributed in the hope that it will be useful, but |
|
9
|
|
|
|
|
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
|
10
|
|
|
|
|
|
|
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
11
|
|
|
|
|
|
|
# for more details. |
|
12
|
|
|
|
|
|
|
# |
|
13
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License along |
|
14
|
|
|
|
|
|
|
# with HTML-FormatExternal. If not, see . |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# Maybe: |
|
19
|
|
|
|
|
|
|
# capture error output |
|
20
|
|
|
|
|
|
|
# errors_to => \$var |
|
21
|
|
|
|
|
|
|
# combine error messages |
|
22
|
|
|
|
|
|
|
# |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
package HTML::FormatExternal; |
|
26
|
5
|
|
|
5
|
|
1498
|
use 5.006; |
|
|
5
|
|
|
|
|
10
|
|
|
|
5
|
|
|
|
|
131
|
|
|
27
|
5
|
|
|
5
|
|
16
|
use strict; |
|
|
5
|
|
|
|
|
4
|
|
|
|
5
|
|
|
|
|
96
|
|
|
28
|
5
|
|
|
5
|
|
13
|
use warnings; |
|
|
5
|
|
|
|
|
5
|
|
|
|
5
|
|
|
|
|
147
|
|
|
29
|
5
|
|
|
5
|
|
17
|
use Carp; |
|
|
5
|
|
|
|
|
7
|
|
|
|
5
|
|
|
|
|
234
|
|
|
30
|
5
|
|
|
5
|
|
16
|
use File::Spec 0.80; # version 0.80 of perl 5.6.0 or thereabouts for devnull() |
|
|
5
|
|
|
|
|
82
|
|
|
|
5
|
|
|
|
|
76
|
|
|
31
|
5
|
|
|
5
|
|
3642
|
use IPC::Run; |
|
|
5
|
|
|
|
|
144701
|
|
|
|
5
|
|
|
|
|
467
|
|
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# uncomment this to run the ### lines |
|
34
|
|
|
|
|
|
|
# use Smart::Comments; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
our $VERSION = 23; |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub new { |
|
39
|
14
|
|
|
14
|
1
|
11220
|
my ($class, %self) = @_; |
|
40
|
14
|
|
|
|
|
45
|
return bless \%self, $class; |
|
41
|
|
|
|
|
|
|
} |
|
42
|
|
|
|
|
|
|
sub format { |
|
43
|
0
|
|
|
0
|
1
|
0
|
my ($self, $html) = @_; |
|
44
|
0
|
0
|
|
|
|
0
|
if (ref $html) { $html = $html->as_HTML; } |
|
|
0
|
|
|
|
|
0
|
|
|
45
|
0
|
|
|
|
|
0
|
return $self->format_string ($html, %$self); |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
|
|
48
|
5
|
|
|
5
|
|
34
|
use constant _WIDE_INPUT_CHARSET => 'UTF-8'; |
|
|
5
|
|
|
|
|
7
|
|
|
|
5
|
|
|
|
|
287
|
|
|
49
|
5
|
|
|
5
|
|
19
|
use constant _WIDE_OUTPUT_CHARSET => 'UTF-8'; |
|
|
5
|
|
|
|
|
6
|
|
|
|
5
|
|
|
|
|
4491
|
|
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# format_string() takes the easy approach of putting the string in a temp |
|
52
|
|
|
|
|
|
|
# file and letting format_file() do the real work. The formatter programs |
|
53
|
|
|
|
|
|
|
# can generally read stdin and write stdout, so might do that with select() |
|
54
|
|
|
|
|
|
|
# to simultaneously write and read back. |
|
55
|
|
|
|
|
|
|
# |
|
56
|
|
|
|
|
|
|
sub format_string { |
|
57
|
0
|
|
|
0
|
1
|
0
|
my ($class, $html_str, %options) = @_; |
|
58
|
|
|
|
|
|
|
|
|
59
|
0
|
|
|
|
|
0
|
my $fh = _tempfile(); |
|
60
|
0
|
|
|
|
|
0
|
my $input_wide = eval { utf8::is_utf8($html_str) }; |
|
|
0
|
|
|
|
|
0
|
|
|
61
|
0
|
|
|
|
|
0
|
_output_wide(\%options, $input_wide); |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# insert while in wide chars |
|
64
|
0
|
0
|
|
|
|
0
|
if (defined $options{'base'}) { |
|
65
|
0
|
|
|
|
|
0
|
$html_str = _base_prefix(\%options, $html_str, $input_wide); |
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
0
|
0
|
|
|
|
0
|
if ($input_wide) { |
|
69
|
0
|
0
|
|
|
|
0
|
if (! $options{'input_charset'}) { |
|
70
|
0
|
|
|
|
|
0
|
$options{'input_charset'} = $class->_WIDE_INPUT_CHARSET; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
### input_charset for wide: $options{'input_charset'} |
|
73
|
0
|
0
|
|
|
|
0
|
if ($options{'input_charset'} eq 'entitize') { |
|
74
|
0
|
|
|
|
|
0
|
$html_str = _entitize($html_str); |
|
75
|
0
|
|
|
|
|
0
|
delete $options{'input_charset'}; |
|
76
|
|
|
|
|
|
|
} else { |
|
77
|
0
|
|
|
|
|
0
|
my $layer = ":encoding($options{'input_charset'})"; |
|
78
|
0
|
0
|
|
|
|
0
|
binmode ($fh, $layer) or die 'Cannot add layer ',$layer; |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
0
|
0
|
|
|
|
0
|
do { |
|
83
|
0
|
0
|
|
|
|
0
|
print $fh $html_str |
|
84
|
|
|
|
|
|
|
and close($fh) |
|
85
|
|
|
|
|
|
|
} || die 'Cannot write temp file: ',$!; |
|
86
|
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
0
|
return $class->format_file ($fh->filename, %options); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Left margin is synthesized by adding spaces afterwards because the various |
|
91
|
|
|
|
|
|
|
# programs have pretty variable support for a specified margin. |
|
92
|
|
|
|
|
|
|
# * w3m doesn't seem to have a left margin option at all |
|
93
|
|
|
|
|
|
|
# * lynx has one but it's too well hidden in its style sheet or something |
|
94
|
|
|
|
|
|
|
# * elinks has document.browse.margin_width but it's limited to 8 or so |
|
95
|
|
|
|
|
|
|
# * netrik doesn't seem to have one at all |
|
96
|
|
|
|
|
|
|
# * vilistextum has a "spaces" internally for lists etc but no apparent |
|
97
|
|
|
|
|
|
|
# way to initialize from the command line |
|
98
|
|
|
|
|
|
|
# |
|
99
|
|
|
|
|
|
|
sub format_file { |
|
100
|
0
|
|
|
0
|
1
|
0
|
my ($class, $filename, %options) = @_; |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# If neither leftmargin nor rightmargin are specified then '_width' is |
|
103
|
|
|
|
|
|
|
# unset and the _make_run() funcs leave it to the program defaults. |
|
104
|
|
|
|
|
|
|
# |
|
105
|
|
|
|
|
|
|
# If either leftmargin or rightmargin are set then '_width' is established |
|
106
|
|
|
|
|
|
|
# and the _make_run() funcs use it and and zero left margin, then the |
|
107
|
|
|
|
|
|
|
# actual left margin is applied below. |
|
108
|
|
|
|
|
|
|
# |
|
109
|
|
|
|
|
|
|
# The DEFAULT_LEFTMARGIN and DEFAULT_RIGHTMARGIN establish the defaults |
|
110
|
|
|
|
|
|
|
# when just one of the two is set. Not good hard coding those values, |
|
111
|
|
|
|
|
|
|
# but the programs don't have anything to set one but not the other. |
|
112
|
|
|
|
|
|
|
# |
|
113
|
0
|
|
|
|
|
0
|
my $leftmargin = $options{'leftmargin'}; |
|
114
|
0
|
|
|
|
|
0
|
my $rightmargin = $options{'rightmargin'}; |
|
115
|
0
|
0
|
0
|
|
|
0
|
if (defined $leftmargin || defined $rightmargin) { |
|
116
|
0
|
0
|
|
|
|
0
|
if (! defined $leftmargin) { $leftmargin = $class->DEFAULT_LEFTMARGIN; } |
|
|
0
|
|
|
|
|
0
|
|
|
117
|
0
|
0
|
|
|
|
0
|
if (! defined $rightmargin) { $rightmargin = $class->DEFAULT_RIGHTMARGIN; } |
|
|
0
|
|
|
|
|
0
|
|
|
118
|
0
|
|
|
|
|
0
|
$options{'_width'} = $rightmargin - $leftmargin; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
|
|
121
|
0
|
|
|
|
|
0
|
_output_wide(\%options, 0); # file input is reckoned as not wide |
|
122
|
0
|
0
|
|
|
|
0
|
if ($options{'output_wide'}) { |
|
123
|
0
|
|
0
|
|
|
0
|
$options{'output_charset'} ||= $class->_WIDE_OUTPUT_CHARSET; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
0
|
|
|
|
|
0
|
my $tempfh; |
|
127
|
0
|
0
|
|
|
|
0
|
if (defined $options{'base'}) { |
|
128
|
|
|
|
|
|
|
# insert by copying to a temp file |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# File::Copy rudely calls eq() to compare $from and $to. Need either |
|
131
|
|
|
|
|
|
|
# File::Temp 0.18 to have that work on $tempfh, or File::Copy 2.??? for |
|
132
|
|
|
|
|
|
|
# it to check an overload method exists first. Newer File::Temp is |
|
133
|
|
|
|
|
|
|
# available from cpan, where File::Copy may not be, so ask for |
|
134
|
|
|
|
|
|
|
# File::Temp 0.18. |
|
135
|
0
|
|
|
|
|
0
|
require File::Temp; |
|
136
|
0
|
|
|
|
|
0
|
File::Temp->VERSION(0.18); |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# must sysread()/syswrite() because that's what File::Copy does (as of |
|
139
|
|
|
|
|
|
|
# its version 2.30) so anything held in the perl buffering by the normal |
|
140
|
|
|
|
|
|
|
# read() is lost. |
|
141
|
|
|
|
|
|
|
|
|
142
|
0
|
|
|
|
|
0
|
my $initial; |
|
143
|
|
|
|
|
|
|
my $fh; |
|
144
|
0
|
0
|
|
|
|
0
|
do { |
|
145
|
0
|
0
|
0
|
|
|
0
|
open $fh, '<', $filename |
|
146
|
|
|
|
|
|
|
and binmode $fh |
|
147
|
|
|
|
|
|
|
and defined (sysread $fh, $initial, 4) |
|
148
|
|
|
|
|
|
|
} || croak "Cannot open $filename: $!"; |
|
149
|
|
|
|
|
|
|
### $initial |
|
150
|
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
0
|
$initial = _base_prefix(\%options, $initial, 0); |
|
152
|
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
0
|
$tempfh = _tempfile(); |
|
154
|
0
|
|
|
|
|
0
|
$tempfh->autoflush(1); |
|
155
|
0
|
|
|
|
|
0
|
require File::Copy; |
|
156
|
0
|
0
|
|
|
|
0
|
do { |
|
157
|
0
|
0
|
0
|
|
|
0
|
defined(syswrite($tempfh, $initial)) |
|
|
|
|
0
|
|
|
|
|
|
158
|
|
|
|
|
|
|
and File::Copy::copy($fh, $tempfh) |
|
159
|
|
|
|
|
|
|
and close $tempfh |
|
160
|
|
|
|
|
|
|
and close $fh |
|
161
|
|
|
|
|
|
|
} || croak "Cannot copy $filename to temp file: $!"; |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
0
|
$filename = $tempfh->filename; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# # dump the file being crunched |
|
168
|
|
|
|
|
|
|
# print "Bytes passed to program:\n"; |
|
169
|
|
|
|
|
|
|
# IPC::Run::run(['hd'], '<',$filename, '|',['cat']); |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# _make_run() can set $options{'ENV'} too |
|
172
|
0
|
|
|
|
|
0
|
my ($command_aref, @run) = $class->_make_run($filename, \%options); |
|
173
|
0
|
|
0
|
|
|
0
|
my $env = $options{'ENV'} || {}; |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
### $command_aref |
|
176
|
|
|
|
|
|
|
### @run |
|
177
|
|
|
|
|
|
|
### $env |
|
178
|
|
|
|
|
|
|
|
|
179
|
0
|
0
|
|
|
|
0
|
if (! @run) { |
|
180
|
0
|
|
|
|
|
0
|
push @run, '<', File::Spec->devnull; |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
0
|
|
|
|
|
0
|
my $str; |
|
184
|
|
|
|
|
|
|
{ |
|
185
|
0
|
|
|
|
|
0
|
local %ENV = (%ENV, %$env); # overrides from _make_command() |
|
|
0
|
|
|
|
|
0
|
|
|
186
|
0
|
|
|
|
|
0
|
eval { IPC::Run::run($command_aref, |
|
|
0
|
|
|
|
|
0
|
|
|
187
|
|
|
|
|
|
|
@run, |
|
188
|
|
|
|
|
|
|
'>', \$str, |
|
189
|
|
|
|
|
|
|
# FIXME: what to do with stderr ? |
|
190
|
|
|
|
|
|
|
# '2>', File::Spec->devnull, |
|
191
|
|
|
|
|
|
|
) }; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
0
|
|
|
|
|
0
|
_die_on_insecure(); |
|
194
|
|
|
|
|
|
|
### $str |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
### final output_wide: $options{'output_wide'} |
|
197
|
0
|
0
|
|
|
|
0
|
if ($options{'output_wide'}) { |
|
198
|
0
|
|
|
|
|
0
|
require Encode; |
|
199
|
0
|
|
|
|
|
0
|
$str = Encode::decode ($options{'output_charset'}, $str); |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
0
|
0
|
|
|
|
0
|
if ($leftmargin) { |
|
203
|
0
|
|
|
|
|
0
|
my $fill = ' ' x $leftmargin; |
|
204
|
0
|
|
|
|
|
0
|
$str =~ s/^(.)/$fill$1/mg; # non-empty lines only |
|
205
|
|
|
|
|
|
|
} |
|
206
|
0
|
|
|
|
|
0
|
return $str; |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# most program running errors are quietly ignored for now, but re-throw |
|
210
|
|
|
|
|
|
|
# "Insecure $ENV{PATH}" when cannot run due to taintedness. |
|
211
|
|
|
|
|
|
|
sub _die_on_insecure { |
|
212
|
0
|
0
|
|
0
|
|
0
|
if ($@ =~ /^Insecure/) { |
|
213
|
0
|
|
|
|
|
0
|
die $@; |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub _run_version { |
|
218
|
39
|
|
|
39
|
|
53
|
my ($self_or_class, $command_aref, @ipc_options) = @_; |
|
219
|
|
|
|
|
|
|
### _run_version() ... |
|
220
|
|
|
|
|
|
|
### $command_aref |
|
221
|
|
|
|
|
|
|
### @ipc_options |
|
222
|
|
|
|
|
|
|
|
|
223
|
39
|
100
|
|
|
|
77
|
if (! @ipc_options) { |
|
224
|
29
|
|
|
|
|
102
|
@ipc_options = ('2>', File::Spec->devnull); |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
|
|
227
|
39
|
|
|
|
|
32
|
my $version; # left undef if any exec/slurp problem |
|
228
|
39
|
|
|
|
|
35
|
eval { IPC::Run::run($command_aref, |
|
|
39
|
|
|
|
|
120
|
|
|
229
|
|
|
|
|
|
|
'<', File::Spec->devnull, |
|
230
|
|
|
|
|
|
|
'>', \$version, |
|
231
|
|
|
|
|
|
|
@ipc_options) }; |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# strip blank lines at end of lynx, maybe others |
|
234
|
39
|
50
|
|
|
|
33619
|
if (defined $version) { $version =~ s/\n+$/\n/s; } |
|
|
0
|
|
|
|
|
0
|
|
|
235
|
39
|
|
|
|
|
90
|
return $version; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# return a File::Temp filehandle object |
|
239
|
|
|
|
|
|
|
sub _tempfile { |
|
240
|
0
|
|
|
0
|
|
|
require File::Temp; |
|
241
|
0
|
|
|
|
|
|
my $fh = File::Temp->new (TEMPLATE => 'HTML-FormatExternal-XXXXXX', |
|
242
|
|
|
|
|
|
|
SUFFIX => '.html', |
|
243
|
|
|
|
|
|
|
TMPDIR => 1); |
|
244
|
0
|
0
|
|
|
|
|
binmode($fh) or die 'Oops, cannot set binmode() on temp file'; |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
### tempfile: $fh->filename |
|
247
|
|
|
|
|
|
|
# $fh->unlink_on_destroy(0); # to preserve for debugging ... |
|
248
|
|
|
|
|
|
|
|
|
249
|
0
|
|
|
|
|
|
return $fh; |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
sub _output_wide { |
|
253
|
0
|
|
|
0
|
|
|
my ($options, $input_wide) = @_; |
|
254
|
0
|
0
|
0
|
|
|
|
if (! defined $options->{'output_wide'} |
|
255
|
|
|
|
|
|
|
|| $options->{'output_wide'} eq 'as_input') { |
|
256
|
0
|
|
|
|
|
|
$options->{'output_wide'} = $input_wide; |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# $str is HTML or some initial bytes. |
|
261
|
|
|
|
|
|
|
# Return a new string with at the start. |
|
262
|
|
|
|
|
|
|
# |
|
263
|
|
|
|
|
|
|
sub _base_prefix { |
|
264
|
0
|
|
|
0
|
|
|
my ($options, $str, $input_wide) = @_; |
|
265
|
0
|
|
|
|
|
|
my $base = delete $options->{'base'}; |
|
266
|
|
|
|
|
|
|
### _base_prefix: $base |
|
267
|
|
|
|
|
|
|
|
|
268
|
0
|
|
|
|
|
|
$base = "$base"; # stringize possible URI object |
|
269
|
0
|
|
|
|
|
|
$base = _entitize($base); # probably shouldn't be any non-ascii in a url |
|
270
|
0
|
|
|
|
|
|
$base = "\n"; |
|
271
|
|
|
|
|
|
|
|
|
272
|
0
|
|
|
|
|
|
my $pos = 0; |
|
273
|
0
|
0
|
|
|
|
|
unless ($input_wide) { |
|
274
|
|
|
|
|
|
|
# encode $base in the input_charset, and possibly after a BOM. |
|
275
|
|
|
|
|
|
|
# |
|
276
|
|
|
|
|
|
|
# Lynx recognises a BOM, if it doesn't have other -assume_charset. It |
|
277
|
|
|
|
|
|
|
# recognises it only at the start of the file, so must insert |
|
278
|
|
|
|
|
|
|
# after it here to preserve that feature of Lynx. |
|
279
|
|
|
|
|
|
|
# |
|
280
|
|
|
|
|
|
|
# If input_charset is utf-32 or utf-16 then it seems reasonable to step |
|
281
|
|
|
|
|
|
|
# over any BOM. But Lynx for some reason doesn't like a BOM together |
|
282
|
|
|
|
|
|
|
# with utf-32 or utf-16 specified. Dunno if that's a bug or a feature |
|
283
|
|
|
|
|
|
|
# on its part. |
|
284
|
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
my $input_charset = $options->{'input_charset'}; |
|
286
|
0
|
0
|
0
|
|
|
|
if (! defined $input_charset || lc($input_charset) eq 'utf-32') { |
|
287
|
0
|
0
|
|
|
|
|
if ($str =~ /^\000\000\376\377/) { |
|
|
|
0
|
|
|
|
|
|
|
288
|
0
|
|
|
|
|
|
$input_charset = 'utf-32be'; |
|
289
|
0
|
|
|
|
|
|
$pos = 4; |
|
290
|
|
|
|
|
|
|
} elsif ($str =~ /^\377\376\000\000/) { |
|
291
|
0
|
|
|
|
|
|
$input_charset = 'utf-32le'; |
|
292
|
0
|
|
|
|
|
|
$pos = 4; |
|
293
|
|
|
|
|
|
|
} |
|
294
|
|
|
|
|
|
|
} |
|
295
|
0
|
0
|
0
|
|
|
|
if (! defined $input_charset || lc($input_charset) eq 'utf-16') { |
|
296
|
0
|
0
|
|
|
|
|
if ($str =~ /^\376\377/) { |
|
|
|
0
|
|
|
|
|
|
|
297
|
0
|
|
|
|
|
|
$input_charset = 'utf-16be'; |
|
298
|
0
|
|
|
|
|
|
$pos = 4; |
|
299
|
|
|
|
|
|
|
} elsif ($str =~ /^\377\376/) { |
|
300
|
0
|
|
|
|
|
|
$input_charset = 'utf-16le'; |
|
301
|
0
|
|
|
|
|
|
$pos = 2; |
|
302
|
|
|
|
|
|
|
} |
|
303
|
|
|
|
|
|
|
} |
|
304
|
0
|
0
|
|
|
|
|
if (defined $input_charset) { |
|
305
|
|
|
|
|
|
|
# encode() errors out if unknown charset, and doesn't exist for older |
|
306
|
|
|
|
|
|
|
# Perl, in which case leave $base as ascii. May not be right, but |
|
307
|
|
|
|
|
|
|
# ought to work with the various ASCII superset encodings. |
|
308
|
0
|
|
|
|
|
|
eval { |
|
309
|
0
|
|
|
|
|
|
require Encode; |
|
310
|
0
|
|
|
|
|
|
$base = Encode::encode ($input_charset, $base); |
|
311
|
|
|
|
|
|
|
}; |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
} |
|
314
|
0
|
|
|
|
|
|
substr($str, $pos,0, $base); # insert $base at $pos |
|
315
|
0
|
|
|
|
|
|
return $str; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# return $str with non-ascii replaced by { entities |
|
319
|
|
|
|
|
|
|
sub _entitize { |
|
320
|
0
|
|
|
0
|
|
|
my ($str) = @_; |
|
321
|
0
|
|
|
|
|
|
$str =~ s{([^\x20-\x7E])}{''.ord($1).';'}eg; |
|
|
0
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
### $str |
|
323
|
0
|
|
|
|
|
|
return $str; |
|
324
|
|
|
|
|
|
|
} |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
1; |
|
327
|
|
|
|
|
|
|
__END__ |