| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# License: http://creativecommons.org/publicdomain/zero/1.0/ |
|
2
|
|
|
|
|
|
|
# (CC0 or Public Domain). To the extent possible under law, the author, |
|
3
|
|
|
|
|
|
|
# Jim Avera (email jim.avera at gmail dot com) has waived all copyright and |
|
4
|
|
|
|
|
|
|
# related or neighboring rights to this document. Attribution is requested |
|
5
|
|
|
|
|
|
|
# but not required. |
|
6
|
3
|
|
|
3
|
|
22
|
use strict; use warnings FATAL => 'all'; use utf8; |
|
|
3
|
|
|
3
|
|
8
|
|
|
|
3
|
|
|
3
|
|
152
|
|
|
|
3
|
|
|
|
|
23
|
|
|
|
3
|
|
|
|
|
17
|
|
|
|
3
|
|
|
|
|
127
|
|
|
|
3
|
|
|
|
|
17
|
|
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
35
|
|
|
7
|
3
|
|
|
3
|
|
100
|
use feature qw(say state lexical_subs current_sub); |
|
|
3
|
|
|
|
|
13
|
|
|
|
3
|
|
|
|
|
260
|
|
|
8
|
3
|
|
|
3
|
|
19
|
no warnings qw(experimental::lexical_subs); |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
2303
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package Spreadsheet::Edit::IO; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# Allow "use <thismodule. VERSION ..." in development sandbox to not bomb |
|
13
|
3
|
|
|
3
|
|
20
|
{ no strict 'refs'; ${__PACKAGE__."::VER"."SION"} = 1999.999; } |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
267
|
|
|
14
|
|
|
|
|
|
|
our $VERSION = '1000.008'; # VERSION from Dist::Zilla::Plugin::OurPkgVersion |
|
15
|
|
|
|
|
|
|
our $DATE = '2023-09-19'; # DATE from Dist::Zilla::Plugin::OurDate |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# This module is derived from the old never-released Text:CSV::Spreadsheet |
|
18
|
|
|
|
|
|
|
|
|
19
|
3
|
|
|
3
|
|
25
|
use Exporter 'import'; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
213
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
our @EXPORT = qw/convert_spreadsheet OpenAsCsv cx2let let2cx cxrx2sheetaddr |
|
22
|
|
|
|
|
|
|
sheetname_from_spec filepath_from_spec |
|
23
|
|
|
|
|
|
|
form_spec_with_sheetname/; |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our @EXPORT_OK = qw/can_cvt_spreadsheets can_extract_allsheets can_extract_named_sheet |
|
26
|
|
|
|
|
|
|
openlibreoffice_path |
|
27
|
|
|
|
|
|
|
@sane_CSV_read_options @sane_CSV_write_options/; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# TODO: Provide "known_attributes" function ala Text::CSV::known_attributes() |
|
30
|
|
|
|
|
|
|
|
|
31
|
3
|
|
|
3
|
|
1612
|
use version (); |
|
|
3
|
|
|
|
|
6575
|
|
|
|
3
|
|
|
|
|
98
|
|
|
32
|
3
|
|
|
3
|
|
19
|
use Carp; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
182
|
|
|
33
|
|
|
|
|
|
|
|
|
34
|
3
|
|
|
3
|
|
19
|
use File::Find (); |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
67
|
|
|
35
|
3
|
|
|
3
|
|
1707
|
use File::Copy (); |
|
|
3
|
|
|
|
|
8606
|
|
|
|
3
|
|
|
|
|
91
|
|
|
36
|
3
|
|
|
3
|
|
1780
|
use File::Copy::Recursive (); |
|
|
3
|
|
|
|
|
14724
|
|
|
|
3
|
|
|
|
|
106
|
|
|
37
|
3
|
|
|
3
|
|
28
|
use File::Glob qw/bsd_glob/; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
367
|
|
|
38
|
|
|
|
|
|
|
|
|
39
|
3
|
|
|
3
|
|
920
|
use Path::Tiny qw/path/; |
|
|
3
|
|
|
|
|
14629
|
|
|
|
3
|
|
|
|
|
168
|
|
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Path::Tiny OBVIATES NEED for many but we still need this |
|
42
|
3
|
|
|
3
|
|
62
|
use File::Spec (); |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
81
|
|
|
43
|
3
|
|
|
3
|
|
619
|
use File::Spec::Functions qw/devnull tmpdir rootdir catdir catfile/; |
|
|
3
|
|
|
|
|
1021
|
|
|
|
3
|
|
|
|
|
249
|
|
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# Still sometimes convenient... |
|
46
|
3
|
|
|
3
|
|
25
|
use File::Basename qw(basename); |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
172
|
|
|
47
|
|
|
|
|
|
|
|
|
48
|
3
|
|
|
3
|
|
1540
|
use File::Which qw/which/; |
|
|
3
|
|
|
|
|
3606
|
|
|
|
3
|
|
|
|
|
176
|
|
|
49
|
3
|
|
|
3
|
|
1565
|
use URI::file (); |
|
|
3
|
|
|
|
|
35299
|
|
|
|
3
|
|
|
|
|
98
|
|
|
50
|
3
|
|
|
3
|
|
24
|
use Guard qw(guard scope_guard); |
|
|
3
|
|
|
|
|
9
|
|
|
|
3
|
|
|
|
|
216
|
|
|
51
|
3
|
|
|
3
|
|
24
|
use Fcntl qw(:flock :seek); |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
431
|
|
|
52
|
3
|
|
|
3
|
|
21
|
use Scalar::Util qw/blessed/; |
|
|
3
|
|
|
|
|
9
|
|
|
|
3
|
|
|
|
|
167
|
|
|
53
|
3
|
|
|
3
|
|
22
|
use List::Util qw/none all notall first min max/; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
261
|
|
|
54
|
3
|
|
|
3
|
|
35
|
use Encode qw(encode decode); |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
166
|
|
|
55
|
3
|
|
|
3
|
|
17
|
use File::Glob qw/bsd_glob GLOB_NOCASE/; |
|
|
3
|
|
|
|
|
22
|
|
|
|
3
|
|
|
|
|
215
|
|
|
56
|
3
|
|
|
3
|
|
22
|
use Digest::MD5 qw/md5_base64/; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
189
|
|
|
57
|
3
|
|
|
3
|
|
18
|
use Text::CSV (); |
|
|
3
|
|
|
|
|
16
|
|
|
|
3
|
|
|
|
|
170
|
|
|
58
|
|
|
|
|
|
|
# DDI 5.025 is needed for Windows-aware qsh() |
|
59
|
3
|
|
|
3
|
|
25
|
use Data::Dumper::Interp 5.025 qw/vis visq dvis dvisq ivis ivisq avis qsh qshlist u/; |
|
|
3
|
|
|
|
|
75
|
|
|
|
3
|
|
|
|
|
35
|
|
|
60
|
|
|
|
|
|
|
|
|
61
|
3
|
|
|
|
|
51
|
use Spreadsheet::Edit::Log qw/log_call fmt_call log_methcall fmt_methcall oops/, |
|
62
|
3
|
|
|
3
|
|
1056
|
':btw=IO${lno}:'; |
|
|
3
|
|
|
|
|
9
|
|
|
63
|
|
|
|
|
|
|
our %SpreadsheetEdit_Log_Options = ( |
|
64
|
|
|
|
|
|
|
is_public_api => sub{ |
|
65
|
|
|
|
|
|
|
$_[1][3] =~ /(?: ::|^ )(?: [a-z][^:]* | OpenAsCsv | ConvertSpreadsheet )$/x |
|
66
|
|
|
|
|
|
|
}, |
|
67
|
|
|
|
|
|
|
); |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
my $progname = path($0)->basename; |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub _get_username(;$) { |
|
72
|
4
|
|
|
4
|
|
9
|
my ($uid) = @_; |
|
73
|
4
|
|
50
|
|
|
31
|
$uid //= eval{ $> } // -1; # default to EUID |
|
|
4
|
|
33
|
|
|
62
|
|
|
74
|
4
|
|
|
|
|
10
|
state $answer = {}; |
|
75
|
4
|
|
66
|
|
|
28
|
return $answer->{$uid} //= do { |
|
76
|
|
|
|
|
|
|
# https://stackoverflow.com/questions/12081246/how-to-get-system-user-full-name-on-windows-in-perl |
|
77
|
3
|
|
33
|
|
|
2830
|
eval { getpwuid($uid) // $uid } |
|
78
|
|
|
|
|
|
|
|| |
|
79
|
3
|
0
|
0
|
|
|
7
|
($^O =~ /MSWin/ && $uid == (eval{$>}//-1) && eval{ # untested... |
|
|
0
|
|
0
|
|
|
0
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
80
|
0
|
|
|
|
|
0
|
require Win32API::Net; |
|
81
|
0
|
|
0
|
|
|
0
|
Win32API::Net::UserGetInfo($ENV{LOGONSERVER}||'',Win32::LoginName(),10,my $info={}); |
|
82
|
|
|
|
|
|
|
$info->{fullName} |
|
83
|
0
|
|
|
|
|
0
|
}) |
|
84
|
|
|
|
|
|
|
|| |
|
85
|
|
|
|
|
|
|
"UID$uid" |
|
86
|
|
|
|
|
|
|
}; |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# A private Libre/Open Office profile dir is needed to avoid conflicts |
|
91
|
|
|
|
|
|
|
# with interactive sessions, see |
|
92
|
|
|
|
|
|
|
# https://ask.libreoffice.org/en/question/290306/how-to-start-independent-lo-instance-process |
|
93
|
|
|
|
|
|
|
# |
|
94
|
|
|
|
|
|
|
# We use a persistent profile dir shared among processes for a given user |
|
95
|
|
|
|
|
|
|
# (actually one for each unique external tool which needs one). |
|
96
|
|
|
|
|
|
|
# Sharing is okay because we get an exclusive lock before actually using it. |
|
97
|
|
|
|
|
|
|
state $profile_parent_dir = do{ # also used for lockfile |
|
98
|
|
|
|
|
|
|
my $user = _get_username(); |
|
99
|
|
|
|
|
|
|
my $dname = __PACKAGE__."_${user}_profileparent"; |
|
100
|
|
|
|
|
|
|
$dname =~ s/::/-/g; |
|
101
|
|
|
|
|
|
|
$dname =~ s/[^-\w.:]/_/g; |
|
102
|
|
|
|
|
|
|
(my $path = path(File::Spec->tmpdir)->child($dname))->mkpath; |
|
103
|
|
|
|
|
|
|
$path # Path::Tiny |
|
104
|
|
|
|
|
|
|
}; |
|
105
|
|
|
|
|
|
|
sub _get_tool_profdir($) { |
|
106
|
0
|
|
|
0
|
|
0
|
my ($tool_path) = @_; |
|
107
|
0
|
|
|
|
|
0
|
my $fingerprint = _file_fingerprint($tool_path); |
|
108
|
0
|
|
|
|
|
0
|
(my $toolname = path($tool_path)->basename(qw/\.\w+$/)) =~ s/[^-\w.:]/_/g; |
|
109
|
0
|
|
|
|
|
0
|
my $path = $profile_parent_dir->child("${toolname}_$fingerprint"); |
|
110
|
0
|
|
|
|
|
0
|
$path->mkpath; |
|
111
|
0
|
|
|
|
|
0
|
$path |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# Prevent concurrent document conversions. |
|
115
|
|
|
|
|
|
|
# LO & OO can't handle concurrent access to the same profile. |
|
116
|
|
|
|
|
|
|
my $locked_fh; |
|
117
|
|
|
|
|
|
|
sub _get_exclusive_lock($) { # returns lock object |
|
118
|
0
|
|
|
0
|
|
0
|
my $opts = shift; |
|
119
|
0
|
|
|
|
|
0
|
my $lockfile_path = $profile_parent_dir->child("LOCKFILE")->canonpath; |
|
120
|
0
|
|
|
|
|
0
|
my $sleeptime = 1; |
|
121
|
0
|
|
|
|
|
0
|
my $lock_fh; |
|
122
|
0
|
|
|
|
|
0
|
while (! defined $lock_fh) { |
|
123
|
|
|
|
|
|
|
#warn "$$ : ### AAA open $lockfile_path ...\n"; |
|
124
|
0
|
0
|
|
|
|
0
|
open $lock_fh, "+>>", $lockfile_path or die $!; |
|
125
|
|
|
|
|
|
|
#warn "$$ : ### AA2 open succeeded.\n"; |
|
126
|
0
|
|
|
|
|
0
|
eval { chmod 0666, $lock_fh; }; # sometimes not implemented |
|
|
0
|
|
|
|
|
0
|
|
|
127
|
|
|
|
|
|
|
#warn "$$ : ### AA3 flock ...\n"; |
|
128
|
0
|
0
|
|
|
|
0
|
if (! flock($lock_fh, LOCK_EX|LOCK_NB)) { |
|
129
|
|
|
|
|
|
|
#warn "$$ : ### AA4 flock FAILED\n"; |
|
130
|
0
|
0
|
|
|
|
0
|
seek($lock_fh, 0, SEEK_SET) or die; |
|
131
|
0
|
|
|
|
|
0
|
my @lines = <$lock_fh>; |
|
132
|
0
|
0
|
|
|
|
0
|
close($lock_fh) or die "close:$!"; $lock_fh = undef; |
|
|
0
|
|
|
|
|
0
|
|
|
133
|
|
|
|
|
|
|
#warn "$$ : ### AA6 fh closed...\n"; |
|
134
|
0
|
|
0
|
|
|
0
|
my $owner = $lines[-1] // ""; # pid NNN (progname) |
|
135
|
0
|
0
|
|
|
|
0
|
{ my ($pid) = ($owner =~ /pid (\d+)/) or last; |
|
|
0
|
|
|
|
|
0
|
|
|
136
|
0
|
0
|
|
|
|
0
|
my @s = stat("/proc/$pid") or last; |
|
137
|
0
|
|
|
|
|
0
|
$owner = _get_username($s[4])." ".$owner; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
0
|
0
|
|
|
|
0
|
my $ownermsg = $owner ? " held by $owner" : ""; |
|
140
|
|
|
|
|
|
|
# Carp::longmess ... |
|
141
|
|
|
|
|
|
|
warn ">> ($$) Waiting for exclusive lock${ownermsg}...\n", |
|
142
|
|
|
|
|
|
|
" $lockfile_path\n" |
|
143
|
0
|
0
|
|
|
|
0
|
unless $opts->{silent}; |
|
144
|
0
|
|
|
|
|
0
|
sleep $sleeptime; |
|
145
|
|
|
|
|
|
|
} else { |
|
146
|
0
|
|
|
|
|
0
|
$locked_fh = $lock_fh; |
|
147
|
0
|
0
|
|
|
|
0
|
seek($lock_fh, 0, SEEK_END) or die; |
|
148
|
0
|
|
|
|
|
0
|
print $lock_fh "pid $$ ($progname)\n"; # always appends anyway on *nix |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
} |
|
151
|
0
|
|
|
|
|
0
|
$opts->{lockfile_fh} = $lock_fh; |
|
152
|
|
|
|
|
|
|
#warn "$$ : ### GOT LOCK\n"; |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
END{ |
|
155
|
3
|
50
|
|
3
|
|
883
|
if (defined $locked_fh) { |
|
156
|
0
|
|
|
|
|
0
|
flock($locked_fh, LOCK_UN); |
|
157
|
0
|
|
|
|
|
0
|
close($locked_fh); |
|
158
|
0
|
|
|
|
|
0
|
$locked_fh = undef; |
|
159
|
0
|
|
|
|
|
0
|
warn "Did emergency unlock!\n"; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
#else { warn "(emergency unlock not needed)\n"; } |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
sub _release_lock($) { |
|
164
|
0
|
|
|
0
|
|
0
|
my $opts = shift; |
|
165
|
0
|
|
0
|
|
|
0
|
my $fh = delete($opts->{lockfile_fh}) // oops; |
|
166
|
0
|
0
|
|
|
|
0
|
oops unless $fh == $locked_fh; |
|
167
|
|
|
|
|
|
|
#seek($fh, 0, SEEK_SET) or die; |
|
168
|
|
|
|
|
|
|
#my @x = (<$fh>); |
|
169
|
|
|
|
|
|
|
#seek($fh, 0, SEEK_SET) or die; |
|
170
|
|
|
|
|
|
|
#warn dvis "$$ : Lockfile contains: @x\n"; |
|
171
|
|
|
|
|
|
|
##warn "$$ : ###BBB stalling before unlock...\n"; sleep 3; |
|
172
|
0
|
|
|
|
|
0
|
truncate($fh,0); |
|
173
|
0
|
0
|
|
|
|
0
|
flock($fh, LOCK_UN) or die "flock UN: $!"; |
|
174
|
0
|
|
|
|
|
0
|
close $fh; |
|
175
|
0
|
|
|
|
|
0
|
$locked_fh = undef; |
|
176
|
|
|
|
|
|
|
#warn "$$ : ###BB0 unlocked and closed.\n"; |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# Libre Office text converter "charset" numbers |
|
180
|
|
|
|
|
|
|
my %LO_charsets = ( |
|
181
|
|
|
|
|
|
|
'WINDOWS1252' => 1, 'WINLATIN1' => 1, |
|
182
|
|
|
|
|
|
|
'APPLEWESTERN' => 2, |
|
183
|
|
|
|
|
|
|
'DOS/OS2437' => 3, |
|
184
|
|
|
|
|
|
|
'DOS/OS2850' => 4, |
|
185
|
|
|
|
|
|
|
'DOS/OS2860' => 5, |
|
186
|
|
|
|
|
|
|
'DOS/OS2861' => 6, |
|
187
|
|
|
|
|
|
|
'DOS/OS2863' => 7, |
|
188
|
|
|
|
|
|
|
'DOS/OS2865' => 8, |
|
189
|
|
|
|
|
|
|
'SYSTEM' => 9, 'SYSTEMDDEFAULT' => 9, |
|
190
|
|
|
|
|
|
|
'SYMBOL' => 10, |
|
191
|
|
|
|
|
|
|
'ASCII' => 11, |
|
192
|
|
|
|
|
|
|
'ISO88591' => 12, |
|
193
|
|
|
|
|
|
|
'ISO88592' => 13, |
|
194
|
|
|
|
|
|
|
'ISO88593' => 14, |
|
195
|
|
|
|
|
|
|
'ISO88594' => 15, |
|
196
|
|
|
|
|
|
|
'ISO88595' => 16, |
|
197
|
|
|
|
|
|
|
'ISO88596' => 17, |
|
198
|
|
|
|
|
|
|
'ISO88597' => 18, |
|
199
|
|
|
|
|
|
|
'ISO88598' => 19, |
|
200
|
|
|
|
|
|
|
'ISO88599' => 20, |
|
201
|
|
|
|
|
|
|
'ISO885914' => 21, |
|
202
|
|
|
|
|
|
|
'ISO885915' => 22, |
|
203
|
|
|
|
|
|
|
'OS2737' => 23, |
|
204
|
|
|
|
|
|
|
'OS2775' => 24, |
|
205
|
|
|
|
|
|
|
'OS2852' => 25, |
|
206
|
|
|
|
|
|
|
'OS2855' => 26, |
|
207
|
|
|
|
|
|
|
'OS2857' => 27, |
|
208
|
|
|
|
|
|
|
'OS2862' => 28, |
|
209
|
|
|
|
|
|
|
'OS2864' => 29, |
|
210
|
|
|
|
|
|
|
'OS2866' => 30, |
|
211
|
|
|
|
|
|
|
'OS2869' => 31, |
|
212
|
|
|
|
|
|
|
'WINDOWS874' => 32, |
|
213
|
|
|
|
|
|
|
'WINDOWS1250' => 33, 'WINLATIN2' => 33, |
|
214
|
|
|
|
|
|
|
'WINDOWS1251' => 34, |
|
215
|
|
|
|
|
|
|
'WINDOWS1253' => 35, |
|
216
|
|
|
|
|
|
|
'WINDOWS1254' => 36, |
|
217
|
|
|
|
|
|
|
'WINDOWS1255' => 37, |
|
218
|
|
|
|
|
|
|
'WINDOWS1256' => 38, |
|
219
|
|
|
|
|
|
|
'WINDOWS1257' => 39, |
|
220
|
|
|
|
|
|
|
'WINDOWS1258' => 40, |
|
221
|
|
|
|
|
|
|
'APPLEARABIC' => 41, |
|
222
|
|
|
|
|
|
|
'APPLECENTRALEUROPEAN' => 42, |
|
223
|
|
|
|
|
|
|
'APPLECROATIAN' => 43, |
|
224
|
|
|
|
|
|
|
'APPLECYRILLIC' => 44, |
|
225
|
|
|
|
|
|
|
'APPLEDEVANAGARI' => 45, |
|
226
|
|
|
|
|
|
|
'APPLEFARSI' => 46, |
|
227
|
|
|
|
|
|
|
'APPLEGREEK' => 47, |
|
228
|
|
|
|
|
|
|
'APPLEGUJARATI' => 48, |
|
229
|
|
|
|
|
|
|
'APPLEGURMUKHI' => 49, |
|
230
|
|
|
|
|
|
|
'APPLEHEBREW' => 50, |
|
231
|
|
|
|
|
|
|
'APPLEICELANDIC' => 51, |
|
232
|
|
|
|
|
|
|
'APPLEROMANIAN' => 52, |
|
233
|
|
|
|
|
|
|
'APPLETHAI' => 53, |
|
234
|
|
|
|
|
|
|
'APPLETURKISH' => 54, |
|
235
|
|
|
|
|
|
|
'APPLEUKRAINIAN' => 55, |
|
236
|
|
|
|
|
|
|
'APPLECHINESESIMPLIFIED' => 56, |
|
237
|
|
|
|
|
|
|
'APPLECHINESETRADITIONAL' => 57, |
|
238
|
|
|
|
|
|
|
'APPLEJAPANESE' => 58, |
|
239
|
|
|
|
|
|
|
'APPLEKOREAN' => 59, |
|
240
|
|
|
|
|
|
|
'WINDOWS932' => 60, |
|
241
|
|
|
|
|
|
|
'WINDOWS936' => 61, |
|
242
|
|
|
|
|
|
|
'WINDOWSWANSUNG949' => 62, |
|
243
|
|
|
|
|
|
|
'WINDOWS950' => 63, |
|
244
|
|
|
|
|
|
|
'SHIFTJIS' => 64, |
|
245
|
|
|
|
|
|
|
'GB2312' => 65, |
|
246
|
|
|
|
|
|
|
'GBT12345' => 66, |
|
247
|
|
|
|
|
|
|
'GBK' => 67, 'GB231280' => 67, |
|
248
|
|
|
|
|
|
|
'BIG5' => 68, |
|
249
|
|
|
|
|
|
|
'EUCJP' => 69, |
|
250
|
|
|
|
|
|
|
'EUCCN' => 70, |
|
251
|
|
|
|
|
|
|
'EUCTW' => 71, |
|
252
|
|
|
|
|
|
|
'ISO2022JP' => 72, |
|
253
|
|
|
|
|
|
|
'ISO2022CN' => 73, |
|
254
|
|
|
|
|
|
|
'KOI8R' => 74, |
|
255
|
|
|
|
|
|
|
'UTF7' => 75, |
|
256
|
|
|
|
|
|
|
'UTF8' => 76, |
|
257
|
|
|
|
|
|
|
'ISO885910' => 77, |
|
258
|
|
|
|
|
|
|
'ISO885913' => 78, |
|
259
|
|
|
|
|
|
|
'EUCKR' => 79, |
|
260
|
|
|
|
|
|
|
'ISO2022KR' => 80, |
|
261
|
|
|
|
|
|
|
'JIS0201' => 81, |
|
262
|
|
|
|
|
|
|
'JIS0208' => 82, |
|
263
|
|
|
|
|
|
|
'JIS0212' => 83, |
|
264
|
|
|
|
|
|
|
'WINDOWSJOHAB1361' => 84, |
|
265
|
|
|
|
|
|
|
'GB18030' => 85, |
|
266
|
|
|
|
|
|
|
'BIG5HKSCS' => 86, |
|
267
|
|
|
|
|
|
|
'TIS620' => 87, |
|
268
|
|
|
|
|
|
|
'KOI8U' => 88, |
|
269
|
|
|
|
|
|
|
'ISCIIDEVANAGARI' => 89, |
|
270
|
|
|
|
|
|
|
'JAVAUTF8' => 90, |
|
271
|
|
|
|
|
|
|
'ADOBESTANDARD' => 91, |
|
272
|
|
|
|
|
|
|
'ADOBESYMBOL' => 92, |
|
273
|
|
|
|
|
|
|
'PT154' => 93, |
|
274
|
|
|
|
|
|
|
'UCS4' => 65534, |
|
275
|
|
|
|
|
|
|
'UCS2' => 65535, |
|
276
|
|
|
|
|
|
|
); |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=for Pod::Coverage _name2LOcharsetnum |
|
279
|
|
|
|
|
|
|
=cut |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
sub _name2LOcharsetnum($) { |
|
282
|
0
|
|
|
0
|
|
0
|
my ($enc) = @_; |
|
283
|
0
|
|
|
|
|
0
|
local $_ = uc $enc; |
|
284
|
0
|
|
|
|
|
0
|
while (! $LO_charsets{$_}) { |
|
285
|
|
|
|
|
|
|
# successively remove - and other special characters |
|
286
|
0
|
0
|
|
|
|
0
|
s/\W//a or confess "LO charset: Unknown encoding name '$enc'"; |
|
287
|
|
|
|
|
|
|
} |
|
288
|
0
|
|
|
|
|
0
|
$LO_charsets{$_} |
|
289
|
|
|
|
|
|
|
} |
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# convert between 0-based index and spreadsheet column letter code. |
|
292
|
|
|
|
|
|
|
# Default argument is $_ |
|
293
|
|
|
|
|
|
|
sub cx2let(_) { |
|
294
|
0
|
|
|
0
|
1
|
0
|
my $cx = shift; |
|
295
|
0
|
|
|
|
|
0
|
my $ABC="A"; ++$ABC for (1..$cx); |
|
|
0
|
|
|
|
|
0
|
|
|
296
|
0
|
|
|
|
|
0
|
return $ABC |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
sub let2cx(_) { |
|
299
|
0
|
|
|
0
|
1
|
0
|
my $ABC = shift; |
|
300
|
0
|
|
|
|
|
0
|
my $n = ord(substr($ABC,0,1,"")) - ord('A'); |
|
301
|
0
|
|
|
|
|
0
|
while (length $ABC) { |
|
302
|
0
|
|
|
|
|
0
|
my $letter = substr($ABC,0,1,""); |
|
303
|
0
|
|
|
|
|
0
|
$n = (($n+1) * 26) + (ord($letter) - ord('A')); |
|
304
|
|
|
|
|
|
|
} |
|
305
|
0
|
|
|
|
|
0
|
return $n; |
|
306
|
|
|
|
|
|
|
} |
|
307
|
|
|
|
|
|
|
sub cxrx2sheetaddr($$) { # (1,99) -> "B100" |
|
308
|
0
|
|
|
0
|
0
|
0
|
my ($cx, $rx) = @_; |
|
309
|
0
|
|
|
|
|
0
|
return cx2let($cx) . ($rx + 1); |
|
310
|
|
|
|
|
|
|
} |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=for Pod::Coverage cxrx2sheetaddr oops btw |
|
313
|
|
|
|
|
|
|
=cut |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
our @sane_CSV_read_options = ( |
|
316
|
|
|
|
|
|
|
# Text::CSV pod says to not specify 'eol' to allow embedded newlines, |
|
317
|
|
|
|
|
|
|
# and to automatically handle "\n", "\r", or "\r\n". |
|
318
|
|
|
|
|
|
|
#eol => $/, |
|
319
|
|
|
|
|
|
|
binary => 1, # Allow reading embedded newlines & unicode etc. |
|
320
|
|
|
|
|
|
|
sep_char => ",", |
|
321
|
|
|
|
|
|
|
quote_char => '"', |
|
322
|
|
|
|
|
|
|
escape_char => '"', # Embedded "s appear as "" |
|
323
|
|
|
|
|
|
|
allow_whitespace => 0, # Preserve leading & trailing white space |
|
324
|
|
|
|
|
|
|
auto_diag => 2, # die on errors |
|
325
|
|
|
|
|
|
|
); |
|
326
|
|
|
|
|
|
|
our @sane_CSV_write_options = ( |
|
327
|
|
|
|
|
|
|
eol => $/, # Necessary when WRITING csv files |
|
328
|
|
|
|
|
|
|
binary => 1, |
|
329
|
|
|
|
|
|
|
sep_char => ",", |
|
330
|
|
|
|
|
|
|
quote_char => '"', |
|
331
|
|
|
|
|
|
|
escape_char => '"', # Embedded "s appear as "" |
|
332
|
|
|
|
|
|
|
allow_whitespace => 0, # Preserve leading & trailing white space |
|
333
|
|
|
|
|
|
|
auto_diag => 2, # die on errors |
|
334
|
|
|
|
|
|
|
); |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
my %Saved_Sigs; |
|
337
|
|
|
|
|
|
|
sub _sighandler { |
|
338
|
0
|
0
|
0
|
0
|
|
0
|
if (! $Saved_Sigs{$_[0]} or $Saved_Sigs{$_[0]} eq 'DEFAULT') { |
|
339
|
|
|
|
|
|
|
# The user isn't catching this, so the process will abort without |
|
340
|
|
|
|
|
|
|
# running destructors: Call exit instead |
|
341
|
0
|
|
|
|
|
0
|
warn "($$)".__PACKAGE__." caught signal $_[0], exiting\n"; |
|
342
|
0
|
|
|
|
|
0
|
Carp::cluck "($$)".__PACKAGE__." caught signal $_[0], exiting\n"; |
|
343
|
0
|
|
|
|
|
0
|
exit 1; |
|
344
|
|
|
|
|
|
|
} |
|
345
|
0
|
|
|
|
|
0
|
$SIG{$_[0]} = $Saved_Sigs{$_[0]}; |
|
346
|
0
|
|
|
|
|
0
|
kill $_[0], $$; |
|
347
|
0
|
|
|
|
|
0
|
oops "Default (or user-defined) sig $_[0] action was to ignore!"; |
|
348
|
|
|
|
|
|
|
} |
|
349
|
|
|
|
|
|
|
sub _signals_guard() { |
|
350
|
0
|
|
0
|
0
|
|
0
|
%Saved_Sigs = ( map{ ($_ => ($SIG{$_} // undef)) } qw/HUP INT QUIT TERM/ ); |
|
|
0
|
|
|
|
|
0
|
|
|
351
|
0
|
|
|
|
|
0
|
$SIG{HUP} = $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = \&_sighandler; |
|
352
|
0
|
|
|
0
|
|
0
|
return guard { @SIG{keys %Saved_Sigs} = (values %Saved_Sigs) } |
|
353
|
0
|
|
|
|
|
0
|
} |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
# Create a probably-unique fingerprint for a particular file |
|
356
|
|
|
|
|
|
|
sub _file_fingerprint($) { |
|
357
|
0
|
|
|
0
|
|
0
|
my $path = shift; |
|
358
|
0
|
|
|
|
|
0
|
my $ctx = Digest::MD5->new; |
|
359
|
0
|
|
|
|
|
0
|
$ctx->add($_) for((stat($path))[0,1,9]); # dev,ino,mtime |
|
360
|
0
|
|
|
|
|
0
|
substr($ctx->b64digest,0,6) |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Find LibreOffice, or failing that OpenOffice |
|
364
|
|
|
|
|
|
|
our $OLpath_answer = $ENV{SPREADSHEET_EDIT_LOPATH}; |
|
365
|
|
|
|
|
|
|
sub openlibreoffice_path() { |
|
366
|
|
|
|
|
|
|
return $OLpath_answer if $OLpath_answer; |
|
367
|
|
|
|
|
|
|
unless ($ENV{SPREADSHEET_EDIT_IGNPATH}) { |
|
368
|
|
|
|
|
|
|
foreach my $short_name (qw(libreoffice loffice localc)) { |
|
369
|
|
|
|
|
|
|
if ($OLpath_answer = which($short_name)) { |
|
370
|
|
|
|
|
|
|
return( ($OLpath_answer=path($OLpath_answer)->canonpath) ); |
|
371
|
|
|
|
|
|
|
} |
|
372
|
|
|
|
|
|
|
} |
|
373
|
|
|
|
|
|
|
} |
|
374
|
|
|
|
|
|
|
# Search for an installation. On Windows it will usually be |
|
375
|
|
|
|
|
|
|
# C:\Program Files\LibreOffice\... |
|
376
|
|
|
|
|
|
|
# On *nix, a local/isolated install (i.e. the result of extracting files |
|
377
|
|
|
|
|
|
|
# from a .deb or other archive) will be |
|
378
|
|
|
|
|
|
|
# somwehere/opt/libreofficeA.B/... |
|
379
|
|
|
|
|
|
|
# and "..." is the same standard hierarchy on all platforms. |
|
380
|
|
|
|
|
|
|
# |
|
381
|
|
|
|
|
|
|
# If multiple are found try to pick the "latest". |
|
382
|
|
|
|
|
|
|
my sub _cmp_subpaths($$) { |
|
383
|
|
|
|
|
|
|
my ($sp1, $sp2) = @_; |
|
384
|
|
|
|
|
|
|
oops if !defined($sp1); |
|
385
|
|
|
|
|
|
|
return 1 if !defined($sp2); |
|
386
|
|
|
|
|
|
|
# Use longest version in the (sub-)path, e.g. "4.4.1/opt/openoffice4/..." |
|
387
|
|
|
|
|
|
|
my (@v1) = sort { length($a) <=> length($b) } ($sp1 =~ /(\d[.\da-z]*)/ag); |
|
388
|
|
|
|
|
|
|
my (@v2) = sort { length($a) <=> length($b) } ($sp2 =~ /(\d[.\da-z]*)/ag); |
|
389
|
|
|
|
|
|
|
my $v1 = $v1[-1]//0; |
|
390
|
|
|
|
|
|
|
my $v2 = $v2[-1]//0; |
|
391
|
|
|
|
|
|
|
if ($v1 =~ s/alpha/0/) { |
|
392
|
|
|
|
|
|
|
return -1 unless $v2 =~ s/alpha/0/; |
|
393
|
|
|
|
|
|
|
} |
|
394
|
|
|
|
|
|
|
if ($v2 =~ /alpha/) { |
|
395
|
|
|
|
|
|
|
return +1 |
|
396
|
|
|
|
|
|
|
} |
|
397
|
|
|
|
|
|
|
version->parse($v1) <=> version->parse($v2) |
|
398
|
|
|
|
|
|
|
} |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# I tried just doing File::Glob::bsd_glob('/*/*/*/opt/libre*/program') but |
|
401
|
|
|
|
|
|
|
# it silently failed even though the same glob works from the shell. Mmff... |
|
402
|
3
|
|
|
3
|
|
28
|
no warnings FATAL => 'all'; |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
38922
|
|
|
403
|
|
|
|
|
|
|
state $is_MSWin = ($^O eq "MSWin32"); |
|
404
|
|
|
|
|
|
|
my (@search_dirs, $searchfor_re, $maxdepth); |
|
405
|
|
|
|
|
|
|
if ($is_MSWin) { |
|
406
|
|
|
|
|
|
|
@search_dirs = ("C:\\Program Files","C:\\Program Files (x86)"); |
|
407
|
|
|
|
|
|
|
$searchfor_re = qr/^Program Files/; |
|
408
|
|
|
|
|
|
|
$maxdepth = 1; |
|
409
|
|
|
|
|
|
|
# depth: C:\Program Files\libreofficeXXX/program/ |
|
410
|
|
|
|
|
|
|
# 1 |
|
411
|
|
|
|
|
|
|
} else { |
|
412
|
|
|
|
|
|
|
@search_dirs = (File::Spec->rootdir()); |
|
413
|
|
|
|
|
|
|
push @search_dirs, $ENV{HOME} if $ENV{HOME}; |
|
414
|
|
|
|
|
|
|
$maxdepth = 4; |
|
415
|
|
|
|
|
|
|
$searchfor_re = qr/^opt$/; |
|
416
|
|
|
|
|
|
|
# depth: /*/*/<unpackparent>/opt/libreofficeXXX/program/ |
|
417
|
|
|
|
|
|
|
# 1 2 3 4 |
|
418
|
|
|
|
|
|
|
} |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
my $debug = $ENV{SPREADSHEET_EDIT_FINDDEBUG}; |
|
421
|
|
|
|
|
|
|
my sub _Findvarsmsg() { |
|
422
|
|
|
|
|
|
|
if (u($_) eq u($File::Find::name) && u($_) eq u($File::Find::fullname)) { |
|
423
|
|
|
|
|
|
|
return qsh($_)."\n" |
|
424
|
|
|
|
|
|
|
} |
|
425
|
|
|
|
|
|
|
if (u($_) eq u($File::Find::name)) { |
|
426
|
|
|
|
|
|
|
return "\$_=name=".qsh($_)." -> ".qsh($File::Find::fullname)."\n"; |
|
427
|
|
|
|
|
|
|
} |
|
428
|
|
|
|
|
|
|
"\$_=".qsh($_)." name=".qsh($File::Find::name)." fullname=".qsh($File::Find::fullname)."\n"; |
|
429
|
|
|
|
|
|
|
} |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
my %results; |
|
432
|
|
|
|
|
|
|
$ENV{SPREADSHEET_EDIT_NOLOSEARCH} or |
|
433
|
|
|
|
|
|
|
File::Find::find( |
|
434
|
|
|
|
|
|
|
{ wanted => sub{ |
|
435
|
|
|
|
|
|
|
# Undef fullname OR invalid "_" filehandle implies a broken symlink, |
|
436
|
|
|
|
|
|
|
# see https://github.com/Perl/perl5/issues/21122 |
|
437
|
|
|
|
|
|
|
# Zero size on *nix implies /proc or something similar; do not enter. |
|
438
|
|
|
|
|
|
|
# File::Find::fullname unreadable implies followed link to inaccessable |
|
439
|
|
|
|
|
|
|
# (The initial "_" stat may be invalid, so "-l _" is useless) |
|
440
|
4865766
|
|
|
4865766
|
|
16926603
|
$! = 0; |
|
441
|
|
|
|
|
|
|
# https://github.com/Perl/perl5/issues/21143 |
|
442
|
4865766
|
|
|
|
|
7671804
|
my $fullname = $File::Find::fullname; |
|
443
|
4865766
|
50
|
33
|
|
|
13641099
|
if (!defined($fullname) && $is_MSWin) { |
|
444
|
0
|
0
|
|
|
|
0
|
warn "# _ MSWin undef fullname! ",_Findvarsmsg() if $debug; |
|
445
|
0
|
|
|
|
|
0
|
stat($_); # lstat was not done. Grr... |
|
446
|
0
|
|
|
|
|
0
|
$fullname = $File::Find::name; |
|
447
|
0
|
0
|
|
|
|
0
|
unless (-d _) { |
|
448
|
0
|
|
|
|
|
0
|
$File::Find::prune = 1; # in case it really is a dir |
|
449
|
0
|
|
|
|
|
0
|
return; |
|
450
|
|
|
|
|
|
|
} |
|
451
|
|
|
|
|
|
|
} else { |
|
452
|
4865766
|
100
|
100
|
|
|
16813158
|
unless (-d _ or -l _) { |
|
453
|
4487594
|
100
|
|
|
|
10388015
|
warn "# _ notdir/symlink: ",_Findvarsmsg() if $debug; |
|
454
|
4487594
|
|
|
|
|
54486706
|
$File::Find::prune = 1; # in case it really is |
|
455
|
4487594
|
|
|
|
|
116840567
|
return; |
|
456
|
|
|
|
|
|
|
} |
|
457
|
|
|
|
|
|
|
} |
|
458
|
378172
|
100
|
33
|
|
|
14702367
|
if ( |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
459
|
|
|
|
|
|
|
!defined($fullname) # broken link, per docs |
|
460
|
|
|
|
|
|
|
|| (! -r _) || (! -x _) # unreadable item or invalid "_" handle |
|
461
|
|
|
|
|
|
|
# https://github.com/Perl/perl5/issues/21122 |
|
462
|
|
|
|
|
|
|
|| (!$is_MSWin && (stat(_))[7] == 0) # zero size ==> /proc etc. |
|
463
|
|
|
|
|
|
|
|| /\$/ # $some_windows_special_thing$ |
|
464
|
|
|
|
|
|
|
|| ! -r $fullname # presumably a symlink to unreadable |
|
465
|
|
|
|
|
|
|
|| ! -x _ # or unsearchable dir |
|
466
|
|
|
|
|
|
|
|| m#^/snap/(?!.*ffice)# # snap other than e.g. /snap/libreoffice |
|
467
|
|
|
|
|
|
|
|| m#^/(proc|dev|sys|tmp|boot|run|lost+found|usr/(include|src))$# |
|
468
|
|
|
|
|
|
|
) { |
|
469
|
646
|
100
|
|
|
|
2310
|
warn "# PRUNING ",_Findvarsmsg() if $debug; |
|
470
|
646
|
|
|
|
|
8617
|
$File::Find::prune = 1; |
|
471
|
|
|
|
|
|
|
return |
|
472
|
646
|
|
|
|
|
35719
|
} |
|
473
|
377526
|
100
|
|
|
|
1707461
|
warn "# DIR: ",_Findvarsmsg() if $debug; |
|
474
|
|
|
|
|
|
|
# Maximum depth: /*/*/<unpackparent>/opt/libreofficeXXX/program/ |
|
475
|
377526
|
|
|
|
|
5659849
|
my $path = path($_); |
|
476
|
377526
|
|
|
|
|
14331437
|
my $depth = scalar(() = $path->stringify =~ m#(/)#g); |
|
477
|
377526
|
100
|
|
|
|
14159501
|
if (basename($_) =~ $searchfor_re) { # ^opt$ or ^Program Files |
|
478
|
6
|
|
|
|
|
24
|
my $prefix = path($_)->parent->parent; |
|
479
|
6
|
|
|
|
|
1095
|
for my $o_l (qw/libre open/) { |
|
480
|
12
|
|
|
|
|
73
|
my $pattern |
|
481
|
|
|
|
|
|
|
= path($_)->child("${o_l}*/program/soffice*")->canonpath; |
|
482
|
|
|
|
|
|
|
# eval because I'm suspicious of the glob on Windows |
|
483
|
12
|
|
|
|
|
914
|
my @hits; eval{ @hits = sort +bsd_glob($pattern, GLOB_NOCASE) }; |
|
|
12
|
|
|
|
|
27
|
|
|
|
12
|
|
|
|
|
573
|
|
|
484
|
12
|
50
|
|
|
|
61
|
if (@hits) { |
|
485
|
|
|
|
|
|
|
# On windows, use soffice.com not .exe because it writes messages |
|
486
|
|
|
|
|
|
|
# to stdout not a window. See https://help.libreoffice.org/7.5/en-GB/text/shared/guide/start_parameters.html?&DbPAR=SHARED&System=WIN |
|
487
|
|
|
|
|
|
|
my $path = (first{ /soffice\.com$/ } @hits) || |
|
488
|
0
|
|
0
|
|
|
0
|
(first{ /soffice$/ } @hits); |
|
489
|
0
|
0
|
|
|
|
0
|
if ($path) { |
|
490
|
0
|
0
|
|
|
|
0
|
$prefix->subsumes($path) or oops dvis '$prefix $path'; |
|
491
|
0
|
|
|
|
|
0
|
my $subpath = path($path)->relative($prefix); |
|
492
|
0
|
0
|
|
|
|
0
|
if (_cmp_subpaths($subpath, $results{$o_l}{subpath}) >= 0) { |
|
493
|
0
|
|
|
|
|
0
|
@{$results{$o_l}}{qw/path subpath/} = ($path, $subpath); |
|
|
0
|
|
|
|
|
0
|
|
|
494
|
|
|
|
|
|
|
# We found where installations are, don't look deeper |
|
495
|
0
|
|
|
|
|
0
|
$maxdepth = $depth; |
|
496
|
|
|
|
|
|
|
} |
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
} |
|
499
|
12
|
50
|
|
|
|
52
|
else { btw dvis '##glob failed: $pattern\n$@' if $@; } |
|
500
|
|
|
|
|
|
|
} |
|
501
|
|
|
|
|
|
|
} |
|
502
|
377526
|
100
|
|
|
|
36142670
|
if ($depth == $maxdepth) { |
|
|
|
50
|
|
|
|
|
|
|
503
|
2086
|
100
|
|
|
|
6066
|
warn "# pruning at maxdepth $depth ",qsh($_),"\n" if $debug; |
|
504
|
2086
|
|
|
|
|
20284
|
$File::Find::prune = 1; |
|
505
|
2086
|
|
|
|
|
46991
|
return; |
|
506
|
|
|
|
|
|
|
} |
|
507
|
0
|
|
|
|
|
0
|
elsif ($depth > $maxdepth) { oops dvis '$depth $maxdepth $_' } |
|
508
|
|
|
|
|
|
|
}, |
|
509
|
|
|
|
|
|
|
follow_fast => 1, |
|
510
|
|
|
|
|
|
|
follow_skip => 2, |
|
511
|
|
|
|
|
|
|
dangling_symlinks => 0, |
|
512
|
|
|
|
|
|
|
no_chdir => 1 |
|
513
|
|
|
|
|
|
|
}, |
|
514
|
|
|
|
|
|
|
@search_dirs |
|
515
|
|
|
|
|
|
|
); |
|
516
|
|
|
|
|
|
|
$OLpath_answer = path( |
|
517
|
|
|
|
|
|
|
$results{libre}{path} // $results{open}{path} |
|
518
|
|
|
|
|
|
|
|| (!$ENV{SPREADSHEET_EDIT_IGNPATH} && which("soffice")) # installed OO? |
|
519
|
|
|
|
|
|
|
|| return(undef) |
|
520
|
|
|
|
|
|
|
)->realpath->canonpath |
|
521
|
|
|
|
|
|
|
}#openlibreoffice_path |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
sub _openlibre_features() { |
|
524
|
1
|
|
|
1
|
|
4
|
state $hash; |
|
525
|
1
|
50
|
|
|
|
8
|
return $hash if defined $hash; |
|
526
|
1
|
|
50
|
|
|
19
|
my $prog = openlibreoffice_path() // return(($hash={ available => 0 })); |
|
527
|
0
|
|
|
|
|
0
|
my $raw_version; |
|
528
|
|
|
|
|
|
|
# This is gross but fast and works in recent versions of LO |
|
529
|
0
|
0
|
|
|
|
0
|
if (my $fh = eval{ path($prog)->realpath->parent->child("types/offapi.rdb") |
|
|
0
|
|
|
|
|
0
|
|
|
530
|
|
|
|
|
|
|
->filehandle("<",":raw")} ) { |
|
531
|
0
|
|
|
|
|
0
|
my $octets; sysread $fh, $octets, 100; |
|
|
0
|
|
|
|
|
0
|
|
|
532
|
0
|
0
|
|
|
|
0
|
if ($octets =~ /Created by LibreOffice (\d+\.\d+\.\w+)/) { |
|
533
|
0
|
|
|
|
|
0
|
$raw_version = $1; |
|
534
|
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
} |
|
536
|
0
|
0
|
|
|
|
0
|
unless ($raw_version) { |
|
537
|
0
|
0
|
|
|
|
0
|
if (qx/$prog --version 2>&1/ =~ /Libre.*? (\d+\.\d+\.\w+)/) { |
|
538
|
0
|
|
|
|
|
0
|
$raw_version = $1; |
|
539
|
|
|
|
|
|
|
} else { |
|
540
|
0
|
|
|
|
|
0
|
warn "$prog --version DID NOT WORK\n"; |
|
541
|
|
|
|
|
|
|
} |
|
542
|
|
|
|
|
|
|
} |
|
543
|
0
|
0
|
|
|
|
0
|
unless ($raw_version) { |
|
544
|
0
|
|
|
|
|
0
|
warn "WARNING: Could not determine version of $prog\n"; |
|
545
|
0
|
|
|
|
|
0
|
$raw_version = "999.01"; |
|
546
|
|
|
|
|
|
|
} |
|
547
|
0
|
|
|
|
|
0
|
my $version = version->parse("v$raw_version"); |
|
548
|
0
|
|
|
|
|
0
|
$hash = { |
|
549
|
|
|
|
|
|
|
available => 1, |
|
550
|
|
|
|
|
|
|
# LibreOffice 7.2 allows extracting all sheets at once |
|
551
|
|
|
|
|
|
|
allsheets => ($version >= version->parse("v7.2")), |
|
552
|
|
|
|
|
|
|
# ...but not yet extracting a single sheet by name. |
|
553
|
|
|
|
|
|
|
# https://bugs.documentfoundation.org/show_bug.cgi?id=135762#c24 |
|
554
|
|
|
|
|
|
|
named_sheet => 0, |
|
555
|
|
|
|
|
|
|
# Supported output formats are too many to list |
|
556
|
|
|
|
|
|
|
ousuf_any => 1, |
|
557
|
|
|
|
|
|
|
raw_version => $raw_version, version => "$version", |
|
558
|
|
|
|
|
|
|
} |
|
559
|
|
|
|
|
|
|
} |
|
560
|
|
|
|
|
|
|
|
|
561
|
0
|
|
|
0
|
|
0
|
sub _openlibre_supports_allsheets() { _openlibre_features()->{allsheets} } |
|
562
|
0
|
|
|
0
|
|
0
|
sub _openlibre_supports_named_sheet() { _openlibre_features()->{named_sheet} } |
|
563
|
0
|
|
|
0
|
|
0
|
sub _openlibre_supports_writing($) { _openlibre_features()->{available} } |
|
564
|
|
|
|
|
|
|
|
|
565
|
1
|
|
|
1
|
|
279
|
sub _ssconvert_features() { return { availble => 0 } } # TODO add back? |
|
566
|
0
|
|
|
0
|
|
0
|
sub _ssconvert_supports_allsheets() { _ssconvert_features()->{allsheets} } |
|
567
|
0
|
|
|
0
|
|
0
|
sub _ssconvert_supports_named_sheet() { _ssconvert_features()->{named_sheet} } |
|
568
|
0
|
|
|
0
|
|
0
|
sub _ssconvert_supports_writing($) { _ssconvert_features()->{available} } |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
# These allow users (e.g. App-diff_spreadsheets tests) to determine |
|
571
|
|
|
|
|
|
|
# if external tool(s) are available to convert between spreadsheet formats |
|
572
|
|
|
|
|
|
|
# or to/from csv (CSVs are supported directly so can always be used) |
|
573
|
|
|
|
|
|
|
# Currently used by t/io.pl to skip tests |
|
574
|
|
|
|
|
|
|
sub can_cvt_spreadsheets() { |
|
575
|
|
|
|
|
|
|
_openlibre_features()->{available} || _ssconvert_features()->{availble} |
|
576
|
1
|
50
|
|
1
|
1
|
2523806
|
} |
|
577
|
|
|
|
|
|
|
sub can_extract_allsheets() { |
|
578
|
0
|
0
|
|
0
|
1
|
0
|
_openlibre_supports_allsheets() || _ssconvert_supports_allsheets() |
|
579
|
|
|
|
|
|
|
} |
|
580
|
|
|
|
|
|
|
sub can_extract_named_sheet() { |
|
581
|
0
|
0
|
0
|
0
|
1
|
0
|
can_extract_allsheets() # used to emulate |
|
582
|
|
|
|
|
|
|
|| _openlibre_supports_named_sheet() || _ssconvert_supports_named_sheet() |
|
583
|
|
|
|
|
|
|
} |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
=for Pod::Coverage can_cvt_spreadsheets can_extract_allsheets |
|
586
|
|
|
|
|
|
|
-for Pod::Coverage can_extract_named_sheet |
|
587
|
|
|
|
|
|
|
=cut |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
sub _runcmd($@) { |
|
590
|
0
|
|
|
0
|
|
0
|
my ($opts, @cmd) = @_; |
|
591
|
0
|
|
|
|
|
0
|
my $guard = _signals_guard; |
|
592
|
|
|
|
|
|
|
# This used to fork & exec but that blows up on MSWin32 because the child |
|
593
|
|
|
|
|
|
|
# pseudo-process executes all END{} blocks everywhere after "exec" |
|
594
|
0
|
|
|
|
|
0
|
my $redirs = ""; |
|
595
|
0
|
0
|
|
|
|
0
|
if ($opts->{suppress_stderr}) { |
|
596
|
0
|
|
|
|
|
0
|
$redirs .= " 2>".devnull(); |
|
597
|
|
|
|
|
|
|
} |
|
598
|
0
|
0
|
|
|
|
0
|
if ($opts->{stdout_to_stderr}) { |
|
599
|
0
|
|
|
|
|
0
|
confess "Not portable"; |
|
600
|
0
|
|
|
|
|
0
|
$redirs .= " 1>&2"; |
|
601
|
|
|
|
|
|
|
} |
|
602
|
0
|
0
|
|
|
|
0
|
if ($opts->{stderr_to_stdout}) { |
|
603
|
0
|
|
|
|
|
0
|
confess "Not portable"; |
|
604
|
0
|
|
|
|
|
0
|
$redirs .= " 2>&1"; |
|
605
|
|
|
|
|
|
|
} |
|
606
|
0
|
0
|
|
|
|
0
|
if ($opts->{suppress_stdout}) { |
|
607
|
0
|
|
|
|
|
0
|
$redirs .= " >".devnull(); |
|
608
|
|
|
|
|
|
|
} |
|
609
|
0
|
|
|
|
|
0
|
my $cmdstr = join(" ", map{qsh} @cmd) . $redirs; |
|
|
0
|
|
|
|
|
0
|
|
|
610
|
0
|
0
|
|
|
|
0
|
if ($redirs) { |
|
611
|
0
|
|
|
|
|
0
|
foreach (@cmd) { |
|
612
|
0
|
0
|
|
|
|
0
|
confess "Can not portably pass argument '$_'" if /["']/; |
|
613
|
|
|
|
|
|
|
} |
|
614
|
0
|
0
|
|
|
|
0
|
warn "> $cmdstr\n" if $opts->{verbose}; |
|
615
|
0
|
|
|
|
|
0
|
system $cmdstr; |
|
616
|
|
|
|
|
|
|
} else { |
|
617
|
0
|
0
|
|
|
|
0
|
warn "> $cmdstr\n" if $opts->{verbose}; |
|
618
|
0
|
|
|
|
|
0
|
system @cmd; |
|
619
|
|
|
|
|
|
|
} |
|
620
|
0
|
|
|
|
|
0
|
return $? |
|
621
|
|
|
|
|
|
|
} |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
sub _fmt_outpath_contents($) { |
|
624
|
0
|
|
0
|
0
|
|
0
|
my $outpath = $_[0]->{outpath} // oops; |
|
625
|
0
|
0
|
|
|
|
0
|
return "(outpath does not exist)" unless -e $outpath; |
|
626
|
0
|
0
|
|
|
|
0
|
return "(outpath is a file)" if -f $outpath; |
|
627
|
0
|
0
|
|
|
|
0
|
return "(outpath is a STRANGE OBJECT)" unless -d $outpath; |
|
628
|
|
|
|
|
|
|
"\n outpath contains: " |
|
629
|
0
|
|
|
|
|
0
|
.join(", ",map{qsh basename $_} path($outpath)->children); |
|
|
0
|
|
|
|
|
0
|
|
|
630
|
|
|
|
|
|
|
} |
|
631
|
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
my $tempdir; |
|
633
|
|
|
|
|
|
|
sub _create_tempdir_if_needed($) { |
|
634
|
2
|
|
|
2
|
|
3
|
my $opts = shift; |
|
635
|
|
|
|
|
|
|
# Keep a per-process persistent temp directory, deleted at process exit. |
|
636
|
|
|
|
|
|
|
# It contains result files when the user did not specify {outpath}, |
|
637
|
|
|
|
|
|
|
# plus a cache of as-yet unrequested sheet .csv files, used when the |
|
638
|
|
|
|
|
|
|
# external tool can only extract all sheets, not a single sheet by name: |
|
639
|
|
|
|
|
|
|
# |
|
640
|
|
|
|
|
|
|
# tempdir/ |
|
641
|
|
|
|
|
|
|
# <ifbase>_<sig>.xlsx etc. # single file returned to user |
|
642
|
|
|
|
|
|
|
# <ifbase>_<sig>/*.csv # directory returned to user |
|
643
|
|
|
|
|
|
|
# <ifbase>_<sig>_csvcache/*.csv |
|
644
|
|
|
|
|
|
|
# |
|
645
|
|
|
|
|
|
|
# <ifbase> is derived from the intput file name, and <sig> is a fingerprint |
|
646
|
|
|
|
|
|
|
# based on input file's dev, inode, and modification timestamp. |
|
647
|
|
|
|
|
|
|
# |
|
648
|
2
|
|
66
|
|
|
8
|
$tempdir //= do{ |
|
649
|
|
|
|
|
|
|
#(my $template = __PACKAGE__."_XXXXX") =~ s/::/-/g; |
|
650
|
|
|
|
|
|
|
#Path::Tiny->tempdir($template) |
|
651
|
1
|
|
|
|
|
3
|
my $pid = $$; |
|
652
|
1
|
|
|
|
|
4
|
my $user = _get_username(); |
|
653
|
1
|
|
|
|
|
10
|
(my $dname = __PACKAGE__."_${user}_${pid}_tempdir") =~ s/::/-/g; |
|
654
|
1
|
|
|
|
|
24
|
(my $path = path(File::Spec->tmpdir)->child($dname))->mkpath; |
|
655
|
1
|
|
|
|
|
380
|
$path |
|
656
|
|
|
|
|
|
|
}; |
|
657
|
|
|
|
|
|
|
} |
|
658
|
3
|
100
|
|
3
|
|
5743
|
END{ $tempdir->remove_tree if $tempdir; } |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
# Compose a unique path under $tempdir. |
|
661
|
|
|
|
|
|
|
# This is *not* a "tempfile" or "tempdir" object which auto-destructs, |
|
662
|
|
|
|
|
|
|
# in fact it does not even exist yet and we don't know here which it will be. |
|
663
|
|
|
|
|
|
|
# Either the user must remove it when they are done with it, or it will |
|
664
|
|
|
|
|
|
|
# be removed when $tempdir is removed at process exit. |
|
665
|
|
|
|
|
|
|
# |
|
666
|
|
|
|
|
|
|
sub _path_under_tempdir($@) { |
|
667
|
0
|
|
|
0
|
|
0
|
my $opts = shift; |
|
668
|
|
|
|
|
|
|
my %args = ( |
|
669
|
0
|
|
|
|
|
0
|
words => [$opts->{ifbase}, $opts->{sheetname}], |
|
670
|
|
|
|
|
|
|
@_ |
|
671
|
|
|
|
|
|
|
); |
|
672
|
0
|
|
|
|
|
0
|
my $bname = join "_", grep{defined} @{$args{words}}; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
673
|
|
|
|
|
|
|
# Collisions occur when recursing to emulate Extract-by-name, |
|
674
|
|
|
|
|
|
|
# or if the user repeatedly reads the same thing, etc. |
|
675
|
0
|
|
|
|
|
0
|
state $seqnums = {}; |
|
676
|
0
|
0
|
|
|
|
0
|
if ($seqnums->{$bname}++) { |
|
677
|
0
|
|
|
|
|
0
|
$bname .= "_".$seqnums->{$bname}; # append unique sequence number |
|
678
|
|
|
|
|
|
|
} |
|
679
|
0
|
0
|
|
|
|
0
|
$bname .= ".$args{suf}" if $args{suf}; |
|
680
|
0
|
|
|
|
|
0
|
return $tempdir->child($bname); |
|
681
|
|
|
|
|
|
|
} |
|
682
|
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
# Compose csv cache subdir path |
|
684
|
|
|
|
|
|
|
sub _cachedir($) { |
|
685
|
0
|
|
|
0
|
|
0
|
my $opts = shift; |
|
686
|
0
|
|
|
|
|
0
|
_path_under_tempdir($opts,words => [$opts->{ifbase}, "csvcache"]); |
|
687
|
|
|
|
|
|
|
} |
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
## Copy an ephemeral temp file to a path under tempdir if needed |
|
690
|
|
|
|
|
|
|
#sub _make_file_permanent($$) { |
|
691
|
|
|
|
|
|
|
# my ($opts, $path) = @_; |
|
692
|
|
|
|
|
|
|
# if (eval{ $path->cached_temp }) { # didn't throw |
|
693
|
|
|
|
|
|
|
# my $suf = $path->basename =~ /\.(\w+)$/a ? $1 : undef; |
|
694
|
|
|
|
|
|
|
# my $newpath = _path_under_tempdir($opts, suf => $suf); |
|
695
|
|
|
|
|
|
|
# $path->move($newpath); |
|
696
|
|
|
|
|
|
|
# return $newpath |
|
697
|
|
|
|
|
|
|
# } else { |
|
698
|
|
|
|
|
|
|
# return $path |
|
699
|
|
|
|
|
|
|
# } |
|
700
|
|
|
|
|
|
|
#} |
|
701
|
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
sub _convert_using_openlibre($$$) { |
|
703
|
0
|
|
|
0
|
|
0
|
my ($opts, $src, $dst) = @_; |
|
704
|
0
|
0
|
|
0
|
|
0
|
oops unless all{ $opts->{$_} } qw/cvt_from cvt_to/; |
|
|
0
|
|
|
|
|
0
|
|
|
705
|
0
|
0
|
0
|
|
|
0
|
oops if $opts->{allsheets} && ! _openlibre_supports_allsheets(); |
|
706
|
0
|
0
|
0
|
|
|
0
|
oops if $opts->{sheetname} && ! _openlibre_supports_named_sheet(); |
|
707
|
0
|
|
|
|
|
0
|
my $debug = $opts->{debug}; |
|
708
|
|
|
|
|
|
|
|
|
709
|
0
|
|
0
|
|
|
0
|
my $prog = openlibreoffice_path() // oops; |
|
710
|
|
|
|
|
|
|
|
|
711
|
0
|
|
|
|
|
0
|
my $saved_UserInstallation = $ENV{UserInstallation}; |
|
712
|
|
|
|
|
|
|
# URI format is file://server/path where 'server' is empty. "file://path" is |
|
713
|
|
|
|
|
|
|
# "never correct, but is often used" en.wikipedia.org/wiki/File_URI_scheme |
|
714
|
|
|
|
|
|
|
# Correct examples: file::///tmp/something file:///C:/somewhere |
|
715
|
0
|
|
|
|
|
0
|
$ENV{UserInstallation} = URI::file->new(_get_tool_profdir($prog)->canonpath); |
|
716
|
0
|
0
|
|
|
|
0
|
warn "Temporarily set UserInstallation=$ENV{UserInstallation}\n" if $debug; |
|
717
|
|
|
|
|
|
|
scope_guard { |
|
718
|
0
|
0
|
|
0
|
|
0
|
if (defined $saved_UserInstallation) { |
|
719
|
0
|
|
|
|
|
0
|
$ENV{UserInstallation} = $saved_UserInstallation; |
|
720
|
|
|
|
|
|
|
} else { |
|
721
|
|
|
|
|
|
|
delete $ENV{UserInstallation} |
|
722
|
0
|
|
|
|
|
0
|
} |
|
723
|
0
|
|
|
|
|
0
|
}; |
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
# The --convert-to argument is "suffix:filtername:filteropts" |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
# I think (not certain) that we can only specify the encoding of CSV files, |
|
728
|
|
|
|
|
|
|
# either as input or output; .xlsx and .ods spreadsheets (which are based |
|
729
|
|
|
|
|
|
|
# on XML) could in principle use any encoding internally, but I'm not sure |
|
730
|
|
|
|
|
|
|
# we can control that, nor should anyone ever need to. |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
# REFERENCES: |
|
733
|
|
|
|
|
|
|
# https://help.libreoffice.org/7.5/en-US/text/shared/guide/start_parameters.html?&DbPAR=SHARED&System=UNIX |
|
734
|
|
|
|
|
|
|
# http://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options |
|
735
|
|
|
|
|
|
|
# https://wiki.documentfoundation.org/Documentation/DevGuide/Spreadsheet_Documents#Filter_Options_for_the_CSV_Filter |
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
# I think we never want to specify the filter unless we have parameters |
|
738
|
|
|
|
|
|
|
# for it. Currently that is only for csv. |
|
739
|
|
|
|
|
|
|
# If no filter is specified, the suffix (e.g. 'ods') should be enough |
|
740
|
0
|
|
|
|
|
0
|
state $suf2ofilter = { |
|
741
|
|
|
|
|
|
|
csv => "Text - txt - csv (StarCalc)", |
|
742
|
|
|
|
|
|
|
txt => "Text - txt - csv (StarCalc)", |
|
743
|
|
|
|
|
|
|
#xls => "MS Excel 97", |
|
744
|
|
|
|
|
|
|
#xlsx => "Calc MS Excel 2007 XML", |
|
745
|
|
|
|
|
|
|
#ods => "calc8", |
|
746
|
|
|
|
|
|
|
}; |
|
747
|
|
|
|
|
|
|
|
|
748
|
0
|
|
0
|
|
|
0
|
my $ifilter = $opts->{soffice_infilter} //= do{ |
|
749
|
0
|
0
|
|
|
|
0
|
if ($opts->{cvt_from} eq "csv") { |
|
750
|
0
|
0
|
|
|
|
0
|
my $filter_name = $suf2ofilter->{$opts->{cvt_from}} or oops; |
|
751
|
0
|
|
|
|
|
0
|
my $enc = $opts->{input_encoding}; |
|
752
|
0
|
|
|
|
|
0
|
my $charset = _name2LOcharsetnum($enc); # dies if unknown enc |
|
753
|
0
|
|
|
|
|
0
|
my $colformats = ""; |
|
754
|
0
|
0
|
|
|
|
0
|
if (my $cf = $opts->{col_formats}) { |
|
755
|
0
|
0
|
|
|
|
0
|
$cf = [split /[\/,]/, $cf] unless ref($cf); # fmtA/fmtB/... |
|
756
|
0
|
|
|
|
|
0
|
for (my $ix=0; $ix <= $#$cf; $ix++) { |
|
757
|
0
|
|
0
|
|
|
0
|
local $_ = $cf->[$ix] // 1; |
|
758
|
0
|
0
|
0
|
|
|
0
|
m#^([123459]|10)$# |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
759
|
|
|
|
|
|
|
|| s#^standard$#1#i |
|
760
|
|
|
|
|
|
|
|| s#^text$#2#i |
|
761
|
|
|
|
|
|
|
|| s#^M+/D+/Y+$#3#i |
|
762
|
|
|
|
|
|
|
|| s#^D+/M+/Y+$#4#i |
|
763
|
|
|
|
|
|
|
|| s#^Y+/M+/D+$#5#i |
|
764
|
|
|
|
|
|
|
|| s#^ignore$#9#i |
|
765
|
|
|
|
|
|
|
|| s#^US.*English$#10#i |
|
766
|
|
|
|
|
|
|
|| croak "Unknown format code '$_' in {col_formats}"; |
|
767
|
0
|
0
|
|
|
|
0
|
$colformats .= "/" if $colformats; |
|
768
|
0
|
|
|
|
|
0
|
$colformats .= ($ix+1)."/$_"; |
|
769
|
|
|
|
|
|
|
} |
|
770
|
|
|
|
|
|
|
} |
|
771
|
|
|
|
|
|
|
$filter_name.":" |
|
772
|
|
|
|
|
|
|
# Tokens 1-4: FldSep=',' TxtDelim='"' Charset FirstLineNum |
|
773
|
|
|
|
|
|
|
#. "44,34,$charset,1" |
|
774
|
|
|
|
|
|
|
. ord($opts->{sep_char}//",")."," |
|
775
|
0
|
|
0
|
|
|
0
|
. ord($opts->{quote_char}//'"')."," |
|
|
|
|
0
|
|
|
|
|
|
776
|
|
|
|
|
|
|
. "$charset,1" |
|
777
|
|
|
|
|
|
|
# Token 5: Cell format codes: |
|
778
|
|
|
|
|
|
|
# If variable-width cells (the norm): colnum/fmt/colnum/fmt... |
|
779
|
|
|
|
|
|
|
# colnum: 1-based column number |
|
780
|
|
|
|
|
|
|
# fmt: 1=Std 2=Text 3=MM/DD/YY 4=DD/MM/YY 5=YY/MM/DD 6-8 unused |
|
781
|
|
|
|
|
|
|
# 9=ignore field (do not import), |
|
782
|
|
|
|
|
|
|
# 10=US-English content (e.g. 3.14 not 3,14) |
|
783
|
|
|
|
|
|
|
# (I'm guessing 1=Std means use current lang [or per Tok 6?]) |
|
784
|
|
|
|
|
|
|
# If fixed-width cells... [something else] |
|
785
|
|
|
|
|
|
|
. ",$colformats" |
|
786
|
|
|
|
|
|
|
# Token 6: MS-LCID Language Id; 0 or omitted means UI language |
|
787
|
|
|
|
|
|
|
. "," # default: false |
|
788
|
|
|
|
|
|
|
# Token 7: On input: true => Quoted cells are always read a 'text', |
|
789
|
|
|
|
|
|
|
# effectively disabling Token 8. This must be false to recognize dates |
|
790
|
|
|
|
|
|
|
# like "Jan 1, 2000" which by necessity must be quoted for the comma, |
|
791
|
|
|
|
|
|
|
# but will **CORRUPT** zip codes with leading zeroes unless |
|
792
|
|
|
|
|
|
|
# col_formats overrides (which it does now by default). |
|
793
|
|
|
|
|
|
|
.",false" # default: false |
|
794
|
|
|
|
|
|
|
# Token 8: on input: "Detect Special Numbers", i.e. date or time values |
|
795
|
|
|
|
|
|
|
# in human form, numbers in scientific (expondntial) notation etc. |
|
796
|
|
|
|
|
|
|
# If false, ONLY decimal numbers (thousands separators ok). |
|
797
|
|
|
|
|
|
|
.",true" # default: false (for import) |
|
798
|
|
|
|
|
|
|
# Tokens 9-10: not used on import |
|
799
|
|
|
|
|
|
|
.",," |
|
800
|
|
|
|
|
|
|
# Token 11: Remove spaces; trim leading & trailing spaces when reading |
|
801
|
|
|
|
|
|
|
."," # default: false |
|
802
|
|
|
|
|
|
|
# Token 12: not use on import |
|
803
|
|
|
|
|
|
|
."," |
|
804
|
|
|
|
|
|
|
# Token 13: Import "=..." as formulas instead of text? |
|
805
|
|
|
|
|
|
|
."," # default: false i.e. do not recognize formulas |
|
806
|
|
|
|
|
|
|
# Token 14: "Automatically detected since LibreOffice 7.6" [BOM?] |
|
807
|
|
|
|
|
|
|
."," |
|
808
|
|
|
|
|
|
|
} |
|
809
|
|
|
|
|
|
|
else { |
|
810
|
|
|
|
|
|
|
undef |
|
811
|
0
|
|
|
|
|
0
|
} |
|
812
|
|
|
|
|
|
|
}; |
|
813
|
|
|
|
|
|
|
|
|
814
|
0
|
|
0
|
|
|
0
|
my $ofilter = $opts->{soffice_outfilter} //= do{ |
|
815
|
|
|
|
|
|
|
# OutputFilterName[:paramtoken,paramtoken,...] |
|
816
|
0
|
0
|
|
|
|
0
|
if ($opts->{cvt_to} eq "csv") { |
|
817
|
0
|
0
|
|
|
|
0
|
my $filter_name = $suf2ofilter->{$opts->{cvt_to}} or oops; |
|
818
|
0
|
|
|
|
|
0
|
my $enc = $opts->{output_encoding}; |
|
819
|
0
|
|
|
|
|
0
|
my $charset = _name2LOcharsetnum($enc); # dies if unknown enc |
|
820
|
|
|
|
|
|
|
$filter_name.":" |
|
821
|
|
|
|
|
|
|
# Tokens 1-4: FldSep=, TxtDelim=" Charset FirstLineNum |
|
822
|
|
|
|
|
|
|
#."44,34,$charset,1" |
|
823
|
|
|
|
|
|
|
. ord($opts->{sep_char}//",")."," |
|
824
|
|
|
|
|
|
|
. ord($opts->{quote_char}//'"')."," |
|
825
|
|
|
|
|
|
|
. "$charset,1" |
|
826
|
|
|
|
|
|
|
# Token 5: Cell format codes. Only used for import? (see above) |
|
827
|
|
|
|
|
|
|
# What about fixed-width? |
|
828
|
|
|
|
|
|
|
."," |
|
829
|
|
|
|
|
|
|
# Token 6: Language identifier (uses Microsoft lang ids) |
|
830
|
|
|
|
|
|
|
# 1033 means US-English (omitted => use UI's language) |
|
831
|
|
|
|
|
|
|
."," |
|
832
|
|
|
|
|
|
|
# Token 7: QuoteAllTextCells |
|
833
|
|
|
|
|
|
|
# *** true will "quote" even single-bareword cells, which looks |
|
834
|
|
|
|
|
|
|
# *** bad and makes t/ tests messier, but preserves information |
|
835
|
|
|
|
|
|
|
# *** that such cells were not numbers or dates, etc. This ensures |
|
836
|
|
|
|
|
|
|
# *** that Zip codes, etc. with leading zeroes won't be corrupted |
|
837
|
|
|
|
|
|
|
# *** if converted back into a spreadsheet |
|
838
|
|
|
|
|
|
|
# Option #1: Specify true to quote all cells on export, then post-process |
|
839
|
|
|
|
|
|
|
# the result to un-quote obviously safe cells (for yet more overhead). |
|
840
|
|
|
|
|
|
|
# Option #2: Specify false, and assume the resulting CSV will never |
|
841
|
|
|
|
|
|
|
# be imported into a spreadsheet except via us, and we pre-scan |
|
842
|
|
|
|
|
|
|
# the data to generate {col_formats} so will usually be safe. |
|
843
|
|
|
|
|
|
|
# 5/30/23: Switching to Option #2... |
|
844
|
|
|
|
|
|
|
.",false" |
|
845
|
|
|
|
|
|
|
# Token 8: on output: true to store number as numbers; false to |
|
846
|
|
|
|
|
|
|
# store number cells as text. No UI equivalent. |
|
847
|
|
|
|
|
|
|
.",true" # default: documented as true (for export) BUT IS NOT! |
|
848
|
|
|
|
|
|
|
# Token 9: "Save cell contents as shown" |
|
849
|
|
|
|
|
|
|
# Generally we DO NOT want this because things like dates |
|
850
|
|
|
|
|
|
|
# can be formatted many different ways. |
|
851
|
|
|
|
|
|
|
##.",".($opts->{raw_values} ? "false" : "true") |
|
852
|
|
|
|
|
|
|
.",false" |
|
853
|
|
|
|
|
|
|
# Token 10: "Export cell formulas" |
|
854
|
|
|
|
|
|
|
.",false" |
|
855
|
|
|
|
|
|
|
# Token 11: not used for export |
|
856
|
|
|
|
|
|
|
."," |
|
857
|
|
|
|
|
|
|
# Token 12: (LO 7.2+) sheet selections: |
|
858
|
|
|
|
|
|
|
# 0 or absent => the "first" sheet |
|
859
|
|
|
|
|
|
|
# 1-N => the Nth sheet (arrgh, can not specify name!!) |
|
860
|
|
|
|
|
|
|
# -1 => export all sheets to files named filebasenamne.Sheetname.csv |
|
861
|
|
|
|
|
|
|
.",".($opts->{allsheets} ? -1 : |
|
862
|
0
|
0
|
0
|
|
|
0
|
$opts->{sheetname} ? die("add named-sheet support here") : |
|
|
|
0
|
0
|
|
|
|
|
|
863
|
|
|
|
|
|
|
0) |
|
864
|
|
|
|
|
|
|
# Token 13: Not used for export |
|
865
|
|
|
|
|
|
|
."," |
|
866
|
|
|
|
|
|
|
# Token 14: true to include BOM in the result |
|
867
|
|
|
|
|
|
|
#."," |
|
868
|
|
|
|
|
|
|
} |
|
869
|
|
|
|
|
|
|
else { |
|
870
|
|
|
|
|
|
|
undef |
|
871
|
0
|
|
|
|
|
0
|
} |
|
872
|
|
|
|
|
|
|
}; |
|
873
|
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
# We can only control the output directory path, not the name of |
|
875
|
|
|
|
|
|
|
# an individual result file. If $dst is a directory then the result |
|
876
|
|
|
|
|
|
|
# could theoretically output into it directly, but instead we always |
|
877
|
|
|
|
|
|
|
# output to an ephemeral temp directory and then move the results to $dst |
|
878
|
|
|
|
|
|
|
# |
|
879
|
|
|
|
|
|
|
# With 'allsheets' the resulting files must be renamed to conform to our |
|
880
|
|
|
|
|
|
|
# external API (namely SHEETNAME.csv). |
|
881
|
|
|
|
|
|
|
# |
|
882
|
|
|
|
|
|
|
# ERROR DETECTION: As of LO 7.5 we always get zero exist status and the |
|
883
|
|
|
|
|
|
|
# only way to detect errors is to notice that no files were written. |
|
884
|
|
|
|
|
|
|
# https://bugs.documentfoundation.org/show_bug.cgi?id=155415 |
|
885
|
|
|
|
|
|
|
# |
|
886
|
0
|
|
|
|
|
0
|
my $tdir = Path::Tiny->tempdir(path($dst)->basename."_XXXXX"); |
|
887
|
|
|
|
|
|
|
# will be deleted when $dirpath goes out of scope |
|
888
|
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
my @cmd = ($prog, |
|
890
|
|
|
|
|
|
|
"--headless", "--invisible", |
|
891
|
|
|
|
|
|
|
"--nolockcheck", "--norestore", |
|
892
|
|
|
|
|
|
|
"--view", # open read-only in case can't create lockfile |
|
893
|
|
|
|
|
|
|
$ifilter ? ("--infilter=$ifilter") : (), |
|
894
|
|
|
|
|
|
|
"--convert-to", |
|
895
|
0
|
0
|
|
|
|
0
|
$opts->{cvt_to}.($ofilter ? ":$ofilter" : ""), |
|
|
|
0
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
"--outdir", $tdir->canonpath, |
|
897
|
|
|
|
|
|
|
path($src)->canonpath); |
|
898
|
|
|
|
|
|
|
|
|
899
|
0
|
0
|
|
|
|
0
|
unless ($debug) { |
|
900
|
0
|
|
|
|
|
0
|
$opts->{suppress_stdout} = 1; |
|
901
|
|
|
|
|
|
|
#$opts->{suppress_stderr} = 1; |
|
902
|
|
|
|
|
|
|
} |
|
903
|
|
|
|
|
|
|
|
|
904
|
0
|
|
|
|
|
0
|
my $cmdstatus = _runcmd($opts, @cmd); |
|
905
|
|
|
|
|
|
|
|
|
906
|
0
|
0
|
|
|
|
0
|
if ($cmdstatus != 0) { |
|
907
|
|
|
|
|
|
|
# This should never happen, see ERROR DETECTION above |
|
908
|
|
|
|
|
|
|
confess sprintf("($$) UNEXPECTED FAILURE, wstat=0x%04x\n",$cmdstatus), |
|
909
|
|
|
|
|
|
|
" converting '$opts->{inpath}' to $opts->{cvt_to}\n", |
|
910
|
0
|
|
|
|
|
0
|
" Command was: ",join(" ",map{qsh} @cmd); |
|
|
0
|
|
|
|
|
0
|
|
|
911
|
|
|
|
|
|
|
} |
|
912
|
|
|
|
|
|
|
|
|
913
|
0
|
|
|
|
|
0
|
my @result_files = path($tdir)->children; |
|
914
|
0
|
0
|
|
|
|
0
|
btw dvis '>> @result_files' if $debug; |
|
915
|
0
|
0
|
|
|
|
0
|
if (@result_files == 0) { |
|
916
|
0
|
0
|
|
|
|
0
|
croak qsh($src)." is missing or unreadable\n", "cmd: @cmd\n" |
|
917
|
|
|
|
|
|
|
unless -r $src; |
|
918
|
0
|
|
|
|
|
0
|
croak "Something went wrong, ",path($prog)->basename," produced no output\n", |
|
919
|
|
|
|
|
|
|
"cmd: @cmd\n" |
|
920
|
|
|
|
|
|
|
} |
|
921
|
|
|
|
|
|
|
|
|
922
|
0
|
0
|
|
|
|
0
|
if ($opts->{allsheets}) { |
|
923
|
|
|
|
|
|
|
# Rename files to match our API (omit the spreadsheetbasename- prefix) |
|
924
|
0
|
|
|
|
|
0
|
foreach (@result_files) { |
|
925
|
0
|
|
|
|
|
0
|
my $dir = $_->parent; # Like dirname but including Volume: |
|
926
|
0
|
|
|
|
|
0
|
my $base = $_->basename; |
|
927
|
0
|
0
|
|
|
|
0
|
(my $newbase = $base) =~ s/^\Q$opts->{ifbase}\E-// or oops dvis '$base $opts'; |
|
928
|
0
|
|
|
|
|
0
|
my $newpath = $dir->child($newbase)->canonpath; |
|
929
|
0
|
|
|
|
|
0
|
my $oldpath = path($_)->canonpath; |
|
930
|
0
|
0
|
|
|
|
0
|
btw ">> Renaming $oldpath -> $newbase\n" if $debug; |
|
931
|
0
|
0
|
|
|
|
0
|
rename ($oldpath, $newpath) or oops "$!"; |
|
932
|
0
|
|
|
|
|
0
|
$_ = $newpath; # update @result_files |
|
933
|
|
|
|
|
|
|
} |
|
934
|
|
|
|
|
|
|
} |
|
935
|
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
# Move the results to $dst |
|
937
|
0
|
0
|
|
|
|
0
|
if (-e $dst) { |
|
938
|
0
|
0
|
|
|
|
0
|
croak "$dst must be a directory if it pre-exists\n" unless -d $dst; |
|
939
|
0
|
0
|
|
|
|
0
|
btw ">> Moving results -> $dst\n" if $debug; |
|
940
|
0
|
|
|
|
|
0
|
foreach (@result_files) { |
|
941
|
0
|
0
|
|
|
|
0
|
btw ">>> move $_ -> $dst" if $debug; |
|
942
|
0
|
|
|
|
|
0
|
File::Copy::move($_, $dst) |
|
943
|
|
|
|
|
|
|
} |
|
944
|
0
|
0
|
|
|
|
0
|
btw ">> Now $dst contains: ",avis($dst->children) if $debug; |
|
945
|
|
|
|
|
|
|
} else { |
|
946
|
0
|
0
|
|
|
|
0
|
if ($opts->{allsheets}) { |
|
947
|
0
|
0
|
|
|
|
0
|
btw ">> dirmove $tdir -> $dst\n" if $debug; |
|
948
|
0
|
0
|
|
|
|
0
|
rename($tdir, $dst) or File::Copy::dirmove($tdir, $dst); |
|
949
|
|
|
|
|
|
|
} else { |
|
950
|
0
|
0
|
|
|
|
0
|
croak "Expecting only one result file, not @result_files" |
|
951
|
|
|
|
|
|
|
if @result_files > 1; |
|
952
|
0
|
0
|
|
|
|
0
|
btw ">> move $result_files[0] -> $dst\n" if $debug; |
|
953
|
0
|
|
|
|
|
0
|
File::Copy::move($result_files[0], $dst); |
|
954
|
|
|
|
|
|
|
} |
|
955
|
|
|
|
|
|
|
} |
|
956
|
|
|
|
|
|
|
}#_convert_using_openlibre |
|
957
|
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
sub _convert_using_ssconvert($$$) { |
|
959
|
0
|
|
|
0
|
|
0
|
my ($opts, $src, $dst) = @_; |
|
960
|
0
|
|
|
|
|
0
|
confess "Deprecated with extreme prejudice"; # no longer supported |
|
961
|
|
|
|
|
|
|
## |
|
962
|
|
|
|
|
|
|
## foreach (qw/inpath cvt_to /) |
|
963
|
|
|
|
|
|
|
## { oops "missing opts->{$_}" unless exists $opts->{$_} } |
|
964
|
|
|
|
|
|
|
## |
|
965
|
|
|
|
|
|
|
## my $eff_outpath = $opts->{outpath}; |
|
966
|
|
|
|
|
|
|
## if (my $prog=which("ssconvert")) { |
|
967
|
|
|
|
|
|
|
## my $enc = _get_encodings_from_opts($opts); |
|
968
|
|
|
|
|
|
|
## $enc //= "UTF-8"; # default |
|
969
|
|
|
|
|
|
|
## my @options; |
|
970
|
|
|
|
|
|
|
## if ($opts->{cvt_to} eq "csv") { |
|
971
|
|
|
|
|
|
|
## push @options, '--export-type=Gnumeric_stf:stf_assistant'; |
|
972
|
|
|
|
|
|
|
## my @dashO_terms = ("format=preserve", "transliterate-mode=escape"); |
|
973
|
|
|
|
|
|
|
## push @dashO_terms, "charset='${enc}'" if defined($enc); |
|
974
|
|
|
|
|
|
|
## if ($opts->{sheetname}) { |
|
975
|
|
|
|
|
|
|
## push @dashO_terms, "sheet='$opts->{sheetname}'"; |
|
976
|
|
|
|
|
|
|
## } |
|
977
|
|
|
|
|
|
|
## if ($opts->{allsheets}) { |
|
978
|
|
|
|
|
|
|
## #If both {allsheets} and {sheetname} are specified, only a single |
|
979
|
|
|
|
|
|
|
## # .csv file will be in the output directory |
|
980
|
|
|
|
|
|
|
## croak "'allsheets' option: 'outpath' must specify an existing directory" |
|
981
|
|
|
|
|
|
|
## unless -d $eff_outpath; |
|
982
|
|
|
|
|
|
|
## $eff_outpath = catfile($eff_outpath, "%s.csv"); |
|
983
|
|
|
|
|
|
|
## push @options, "--export-file-per-sheet"; |
|
984
|
|
|
|
|
|
|
## } |
|
985
|
|
|
|
|
|
|
## elsif ($opts->{sheetname}) { |
|
986
|
|
|
|
|
|
|
## # handled above |
|
987
|
|
|
|
|
|
|
## } |
|
988
|
|
|
|
|
|
|
## else { |
|
989
|
|
|
|
|
|
|
## # A backwards-incompatible change to ssconvert stopped extracting |
|
990
|
|
|
|
|
|
|
## # the "current" sheet by default; now all sheets are concatenated! |
|
991
|
|
|
|
|
|
|
## # See https://gitlab.gnome.org/GNOME/gnumeric/issues/461 |
|
992
|
|
|
|
|
|
|
## # ssconvert verison 1.12.45 supports a new "-O active-sheet=y" option |
|
993
|
|
|
|
|
|
|
## ## PORTABILITY BUG: Redirection syntax will not work on windows |
|
994
|
|
|
|
|
|
|
## my ($ssver) = (qx/ssconvert --version 2>&1/ =~ /ssconvert version '?(\d[\d\.]*)/); |
|
995
|
|
|
|
|
|
|
## if (version::is_lax($ssver) && version->parse($ssver) >= v1.12.45) { |
|
996
|
|
|
|
|
|
|
## push @dashO_terms, "active-sheet=y"; |
|
997
|
|
|
|
|
|
|
## } else { |
|
998
|
|
|
|
|
|
|
## croak("Due to an ssconvert bug, a sheetname must be given.\n", |
|
999
|
|
|
|
|
|
|
## "(for more information, see comment at ",__FILE__, |
|
1000
|
|
|
|
|
|
|
## " near line ", (__LINE__-10), ")\n"); |
|
1001
|
|
|
|
|
|
|
## } |
|
1002
|
|
|
|
|
|
|
## } |
|
1003
|
|
|
|
|
|
|
## push @options, '-O', join(" ",@dashO_terms); |
|
1004
|
|
|
|
|
|
|
## } |
|
1005
|
|
|
|
|
|
|
## elsif ($opts->{cvt_to} eq 'xlsx') { |
|
1006
|
|
|
|
|
|
|
## @options = ('--export-type=Gnumeric_Excel:xlsx2'); |
|
1007
|
|
|
|
|
|
|
## } |
|
1008
|
|
|
|
|
|
|
## elsif ($opts->{cvt_to} eq 'xls') { |
|
1009
|
|
|
|
|
|
|
## @options = ('--export-type=Gnumeric_Excel:excel_biff8'); # M'soft Excel 97/2000/XP |
|
1010
|
|
|
|
|
|
|
## } |
|
1011
|
|
|
|
|
|
|
## elsif ($opts->{cvt_to} =~ /^od/) { |
|
1012
|
|
|
|
|
|
|
## @options = ('--export-type=Gnumeric_OpenCalc:odf'); |
|
1013
|
|
|
|
|
|
|
## } |
|
1014
|
|
|
|
|
|
|
## elsif ($eff_outpath =~ /\.[a-z]{3,4}$/) { |
|
1015
|
|
|
|
|
|
|
## # let ssconvert choose based on the output file suffix |
|
1016
|
|
|
|
|
|
|
## } |
|
1017
|
|
|
|
|
|
|
## else { |
|
1018
|
|
|
|
|
|
|
## croak "unrecognized cvt_to='".u($opts->{cvt_to})."' and no outpath suffix"; |
|
1019
|
|
|
|
|
|
|
## } |
|
1020
|
|
|
|
|
|
|
## |
|
1021
|
|
|
|
|
|
|
## my $eff_inpath = $opts->{inpath}; |
|
1022
|
|
|
|
|
|
|
## if ($opts->{sheetname} && $opts->{inpath} =~ /.csv$/i) { |
|
1023
|
|
|
|
|
|
|
## # Control generated sheet name by using a symlink to the input file |
|
1024
|
|
|
|
|
|
|
## # See http://stackoverflow.com/questions/22550050/how-to-convert-csv-to-xls-with-ssconvert |
|
1025
|
|
|
|
|
|
|
## my $td = catdir($tempdir // oops, "Gnumeric"); |
|
1026
|
|
|
|
|
|
|
## remove_tree($td); mkdir($td) or die $!; |
|
1027
|
|
|
|
|
|
|
## $eff_inpath = catfile($td, $opts->{sheetname}); |
|
1028
|
|
|
|
|
|
|
## symlink $opts->{inpath}, $eff_inpath or die $!; |
|
1029
|
|
|
|
|
|
|
## fixme: handle unimplmented or no-perms symlink failures |
|
1030
|
|
|
|
|
|
|
## } |
|
1031
|
|
|
|
|
|
|
## my @cmd = ($prog, @options, $eff_inpath, $eff_outpath); |
|
1032
|
|
|
|
|
|
|
## |
|
1033
|
|
|
|
|
|
|
## my $suppress_stderr = !$opts->{debug}; |
|
1034
|
|
|
|
|
|
|
## if (0 != _runcmd({%$opts, suppress_stderr => $suppress_stderr}, @cmd)) { |
|
1035
|
|
|
|
|
|
|
## # Before showing a complicated ssconvert failure with backtrace, |
|
1036
|
|
|
|
|
|
|
## # check to see if the problem is just a non-existent input file |
|
1037
|
|
|
|
|
|
|
## { open my $dummy_fh, "<", $eff_inpath or croak "$eff_inpath : $!"; } |
|
1038
|
|
|
|
|
|
|
## my $failmsg = "($$) Conversion of '$opts->{inpath}' to $eff_outpath failed\n"."cmd: ".qshlist(@cmd)."\n"; |
|
1039
|
|
|
|
|
|
|
## if ($suppress_stderr) { # repeat showing all output |
|
1040
|
|
|
|
|
|
|
## if (0 == _runcmd({%$opts, suppress_stderr => 0}, @cmd)) { |
|
1041
|
|
|
|
|
|
|
## warn "Surprise! Command failed the first time but succeeded on 2nd try!\n"; |
|
1042
|
|
|
|
|
|
|
## } |
|
1043
|
|
|
|
|
|
|
## croak $failmsg; |
|
1044
|
|
|
|
|
|
|
## } |
|
1045
|
|
|
|
|
|
|
## } |
|
1046
|
|
|
|
|
|
|
## elsif (! -e $opts->{outpath}) { |
|
1047
|
|
|
|
|
|
|
## croak "($$) Conversion SILENTLY failed\n(using $prog)\n", |
|
1048
|
|
|
|
|
|
|
## " cmd: ",qshlist(@cmd),"\n" |
|
1049
|
|
|
|
|
|
|
## ; |
|
1050
|
|
|
|
|
|
|
## } |
|
1051
|
|
|
|
|
|
|
## return ($enc) |
|
1052
|
|
|
|
|
|
|
## } |
|
1053
|
|
|
|
|
|
|
## else { |
|
1054
|
|
|
|
|
|
|
## croak "Can not find ssconvert to convert '$opts->{inpath}' to $opts->{cvt_to}\n", |
|
1055
|
|
|
|
|
|
|
## "To install ssconvert: sudo apt-get install gnumeric\n"; |
|
1056
|
|
|
|
|
|
|
## } |
|
1057
|
|
|
|
|
|
|
} |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
# Extracts |||SHEETNAME or !SHEETNAME or [SHEETNAME] from a path+sheet |
|
1060
|
|
|
|
|
|
|
# specification, if present. |
|
1061
|
|
|
|
|
|
|
# (Lots of historical compatibility issues...) |
|
1062
|
|
|
|
|
|
|
# In scalar context, returns SHEETNAME or undef. |
|
1063
|
|
|
|
|
|
|
# INTERNAL USE ONLY: In array context, returns (filepath, SHEETNAME or undef) |
|
1064
|
|
|
|
|
|
|
sub sheetname_from_spec($) { |
|
1065
|
6
|
|
|
6
|
1
|
26
|
my $spec = shift; |
|
1066
|
6
|
|
|
|
|
11
|
local $_; |
|
1067
|
6
|
|
|
|
|
18
|
my $p = path($spec); |
|
1068
|
6
|
|
|
|
|
350
|
my $parent = $p->parent; |
|
1069
|
6
|
|
|
|
|
407
|
my ($base,$sn) = ($p->basename =~ /^(.*) (?| \|\|\|([^\!\[\|]+)$ |
|
1070
|
|
|
|
|
|
|
| \!([^\!\[\|]+)$ |
|
1071
|
|
|
|
|
|
|
| \[([^\[\]]+)\]$ |
|
1072
|
|
|
|
|
|
|
)/x); |
|
1073
|
6
|
100
|
33
|
|
|
114
|
wantarray ? ($parent->child($base//$p->basename)->stringify, $sn) : $sn |
|
1074
|
|
|
|
|
|
|
} |
|
1075
|
|
|
|
|
|
|
sub filepath_from_spec($) { |
|
1076
|
2
|
|
|
2
|
1
|
6
|
my ($path, undef) = sheetname_from_spec($_[0]); |
|
1077
|
2
|
|
|
|
|
103
|
$path |
|
1078
|
|
|
|
|
|
|
} |
|
1079
|
|
|
|
|
|
|
#Tester |
|
1080
|
|
|
|
|
|
|
#foreach ("", "/a!b/c", "/a!b/c!sheet1", "/a/b/c[sheet2]", "/a/b/c[bozo]d.xls", |
|
1081
|
|
|
|
|
|
|
# ) { |
|
1082
|
|
|
|
|
|
|
# foreach($_, basename($_)) { |
|
1083
|
|
|
|
|
|
|
# my ($fp,$sn) = sheetname_from_spec($_); |
|
1084
|
|
|
|
|
|
|
# use open ':std', ':locale'; |
|
1085
|
|
|
|
|
|
|
# warn ivis '# $_ → $fp $sn\n'; |
|
1086
|
|
|
|
|
|
|
# my $sn2 = sheetname_from_spec($_); |
|
1087
|
|
|
|
|
|
|
# die "bug" unless u($sn) eq u($sn2); |
|
1088
|
|
|
|
|
|
|
# } |
|
1089
|
|
|
|
|
|
|
#} |
|
1090
|
|
|
|
|
|
|
#die "TEX"; |
|
1091
|
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
# Construct a file + sheetname spec in the preferred form for humans to read |
|
1093
|
|
|
|
|
|
|
# If sheetname is undef, just return the file path |
|
1094
|
|
|
|
|
|
|
sub form_spec_with_sheetname($$) { |
|
1095
|
2
|
|
|
2
|
1
|
10
|
my ($filespec, $sheetname) = @_; |
|
1096
|
2
|
|
|
|
|
7
|
my $embedded_sheetname = sheetname_from_spec($filespec); |
|
1097
|
2
|
0
|
33
|
|
|
8
|
croak "conflicting embedded and separate sheetnames given" |
|
|
|
|
33
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
if $embedded_sheetname && $sheetname && $embedded_sheetname ne $sheetname; |
|
1099
|
2
|
|
33
|
|
|
14
|
$sheetname //= $embedded_sheetname; |
|
1100
|
2
|
|
|
|
|
6
|
my $filepath = filepath_from_spec($filespec); |
|
1101
|
2
|
50
|
|
|
|
19
|
$sheetname ? "${filepath}[${sheetname}]" : $filepath |
|
1102
|
|
|
|
|
|
|
#$sheetname ? "${filepath}|||${sheetname}" : $filepath |
|
1103
|
|
|
|
|
|
|
} |
|
1104
|
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
our $default_input_encodings = "UTF-8,windows-1252,UTF-16BE,UTF-16LE"; |
|
1106
|
|
|
|
|
|
|
our $default_output_encoding = "UTF-8"; |
|
1107
|
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
# Return digested %opts setting |
|
1109
|
|
|
|
|
|
|
# sheetname, inpath_sans_sheet (as Path::Tiny), encoding or default |
|
1110
|
|
|
|
|
|
|
sub _process_args($;@) { |
|
1111
|
2
|
50
|
|
2
|
|
5
|
confess "fix obsolete call to pass linearized options" |
|
1112
|
|
|
|
|
|
|
if ref($_[0]) eq "HASH"; |
|
1113
|
2
|
50
|
|
|
|
8
|
my $leading_inpath = ( scalar(@_) % 2 == 1 ? shift(@_) : undef ); |
|
1114
|
2
|
|
|
|
|
12
|
my %opts = ( |
|
1115
|
|
|
|
|
|
|
cvt_from => "", |
|
1116
|
|
|
|
|
|
|
cvt_to => "", |
|
1117
|
|
|
|
|
|
|
@_, |
|
1118
|
|
|
|
|
|
|
#verbose => 999, tempdir => "/tmp/J", |
|
1119
|
|
|
|
|
|
|
); |
|
1120
|
2
|
50
|
|
|
|
12
|
if (defined $opts{inpath}) { |
|
1121
|
0
|
0
|
|
|
|
0
|
croak "Initial INPATH arg specified as well as inpath => ... in options" |
|
1122
|
|
|
|
|
|
|
if defined $leading_inpath; |
|
1123
|
|
|
|
|
|
|
} else { |
|
1124
|
2
|
|
33
|
|
|
13
|
$opts{inpath} = $leading_inpath // croak "No inpath was specified"; |
|
1125
|
|
|
|
|
|
|
} |
|
1126
|
2
|
50
|
|
|
|
7
|
$opts{verbose}=1 if $opts{debug}; |
|
1127
|
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
# inpath or outpath may have "!sheetname" appended (or alternate syntaxes), |
|
1129
|
|
|
|
|
|
|
# but may exist only if a separate 'sheetname' option is not specified. |
|
1130
|
|
|
|
|
|
|
# Input and output can not both be spreadsheets; one must be a CSV. |
|
1131
|
2
|
50
|
|
|
|
8
|
if (exists($opts{sheet})) { |
|
1132
|
0
|
|
|
|
|
0
|
carp "WARNING: Deprecated 'sheet' option found (use 'sheetname' instead)\n"; |
|
1133
|
0
|
0
|
|
|
|
0
|
croak "Both {sheet} and {sheetname} specified" if exists $opts{sheetname}; |
|
1134
|
0
|
|
|
|
|
0
|
$opts{sheetname} = delete $opts{sheet}; |
|
1135
|
|
|
|
|
|
|
} |
|
1136
|
2
|
|
|
|
|
3
|
{ my ($path_sans_sheet, $sheetname, $key); |
|
|
2
|
|
|
|
|
5
|
|
|
1137
|
2
|
|
|
|
|
7
|
for my $thiskey ('inpath', 'outpath') { |
|
1138
|
4
|
|
100
|
|
|
17
|
my $spec = $opts{$thiskey} || next; |
|
1139
|
2
|
|
|
|
|
8
|
my ($pssn, $sn) = sheetname_from_spec($spec); |
|
1140
|
2
|
50
|
|
|
|
133
|
if (defined $sn) { |
|
1141
|
0
|
0
|
|
|
|
0
|
croak "A sheetname is embeeded in both ", |
|
1142
|
|
|
|
|
|
|
"'$thiskey' ($opts{$thiskey}) and '$key' ($opts{$key})\n" |
|
1143
|
|
|
|
|
|
|
if $sheetname; |
|
1144
|
0
|
|
|
|
|
0
|
($path_sans_sheet, $sheetname, $key) = ($pssn, $sn, $thiskey); |
|
1145
|
|
|
|
|
|
|
} |
|
1146
|
|
|
|
|
|
|
} |
|
1147
|
2
|
50
|
|
|
|
10
|
if ($opts{sheetname}) { |
|
|
|
50
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
croak "'sheetname' option conflicts with embedded sheet name\n", |
|
1149
|
|
|
|
|
|
|
" sheetname => ", qsh($opts{sheetname}),"\n", |
|
1150
|
|
|
|
|
|
|
" $key => ", qsh($opts{$key}),"\n" |
|
1151
|
0
|
0
|
0
|
|
|
0
|
if defined($sheetname) && $sheetname ne $opts{sheetname}; |
|
1152
|
|
|
|
|
|
|
} |
|
1153
|
|
|
|
|
|
|
elsif (defined $sheetname) { |
|
1154
|
|
|
|
|
|
|
btw "(extracted sheet name \"$sheetname\" from $key)\n" |
|
1155
|
0
|
0
|
|
|
|
0
|
if $opts{verbose}; |
|
1156
|
0
|
|
|
|
|
0
|
$opts{sheetname} = $sheetname; |
|
1157
|
|
|
|
|
|
|
} |
|
1158
|
|
|
|
|
|
|
$opts{inpath_sans_sheet} = path( |
|
1159
|
|
|
|
|
|
|
($key && $key eq 'inpath') ? $path_sans_sheet : $opts{inpath} |
|
1160
|
2
|
50
|
33
|
|
|
11
|
); |
|
1161
|
|
|
|
|
|
|
} |
|
1162
|
|
|
|
|
|
|
# Input file basename sans any .suffix |
|
1163
|
2
|
|
|
|
|
110
|
$opts{ifbase} = $opts{inpath_sans_sheet}->basename(qr/\.[^.]+/); |
|
1164
|
|
|
|
|
|
|
|
|
1165
|
2
|
|
|
|
|
135
|
%opts |
|
1166
|
|
|
|
|
|
|
}#_process_args |
|
1167
|
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
# Extract the of encoding(s) specified in an iolayers string |
|
1169
|
|
|
|
|
|
|
# Parse iolayers string, returning ($prefix,[encodings],$suffix) |
|
1170
|
|
|
|
|
|
|
# For example from ":raw:encodings(utf8,windows-1252):zz" the output |
|
1171
|
|
|
|
|
|
|
# would be (":raw", [:utf8","windows-1252"], ":zz") |
|
1172
|
|
|
|
|
|
|
sub _parse_iolayers($) { |
|
1173
|
0
|
|
0
|
0
|
|
0
|
local $_ = (shift) // ""; |
|
1174
|
0
|
0
|
|
|
|
0
|
/\A(<prefix>.*?) |
|
1175
|
|
|
|
|
|
|
(<encspec>:utf8|:encoding\(([^\)]+)\)) |
|
1176
|
|
|
|
|
|
|
(<suffix>.*?)\z/ or croak "Invalid iolayers spec '$_'\n"; |
|
1177
|
0
|
|
|
|
|
0
|
(my $prefix, $_, my $suffix) = ($+{prefix}, $+{encspec}, $+{suffix}); |
|
1178
|
0
|
0
|
0
|
|
|
0
|
/^:(utf8)$/ || /^:encoding\(([^\)]+)\)$/ or oops($_); |
|
1179
|
0
|
|
|
|
|
0
|
my $enclist = [split /,/, $1]; # comma,separated,list,of,encodings |
|
1180
|
0
|
|
|
|
|
0
|
($prefix, $enclist, $suffix); |
|
1181
|
|
|
|
|
|
|
} |
|
1182
|
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
# Detect cvt_from and cvt_to from filenames, or peeking at the data. |
|
1184
|
|
|
|
|
|
|
# If input is CSV, detect encoding, separator and quote characters; |
|
1185
|
|
|
|
|
|
|
# add quotes to values with leading zeroes (e.g. Zip codes) which would |
|
1186
|
|
|
|
|
|
|
# otherwise be corrupted by being read as numbers instead of text strings. |
|
1187
|
|
|
|
|
|
|
# The modified data is written to a temp file |
|
1188
|
|
|
|
|
|
|
# Set default output_encoding if not specified |
|
1189
|
|
|
|
|
|
|
# RETURNS: The effective input path, either inpath_sans_sheet or a tempfile |
|
1190
|
|
|
|
|
|
|
sub _determine_enc_tofrom($) { |
|
1191
|
|
|
|
|
|
|
my $opts = shift; |
|
1192
|
|
|
|
|
|
|
my $debug = $opts->{debug}; |
|
1193
|
|
|
|
|
|
|
# Skip to ==BODY== below |
|
1194
|
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
my sub determine_input_encoding($) { |
|
1196
|
|
|
|
|
|
|
my $r2octets = shift; |
|
1197
|
|
|
|
|
|
|
# If user specified one encoding, use it; if user specified list, try them. |
|
1198
|
|
|
|
|
|
|
# If user did not specify, the default is a list to try. |
|
1199
|
|
|
|
|
|
|
$opts->{input_encoding} //= $default_input_encodings; |
|
1200
|
|
|
|
|
|
|
my @enclist = split m#,#, $opts->{input_encoding}; |
|
1201
|
|
|
|
|
|
|
return |
|
1202
|
|
|
|
|
|
|
if @enclist == 1; |
|
1203
|
|
|
|
|
|
|
$$r2octets //= $opts->{inpath_sans_sheet}->slurp_raw; |
|
1204
|
|
|
|
|
|
|
for my $enc (@enclist) { |
|
1205
|
|
|
|
|
|
|
eval { decode($enc, $$r2octets, Encode::FB_CROAK|Encode::LEAVE_SRC) }; |
|
1206
|
|
|
|
|
|
|
if ($@) { |
|
1207
|
|
|
|
|
|
|
btw "Input encoding '$enc' did not work...($@)\n" if $debug; |
|
1208
|
|
|
|
|
|
|
next; |
|
1209
|
|
|
|
|
|
|
} |
|
1210
|
|
|
|
|
|
|
btw "Input encoding '$enc' seems to work.\n" if $debug; |
|
1211
|
|
|
|
|
|
|
@enclist = ($enc); |
|
1212
|
|
|
|
|
|
|
last |
|
1213
|
|
|
|
|
|
|
} |
|
1214
|
|
|
|
|
|
|
#croak "Could not detect encoding of $opts->{inpath_sans_sheet}\n" |
|
1215
|
|
|
|
|
|
|
confess "Could not detect encoding of $opts->{inpath_sans_sheet}\n" |
|
1216
|
|
|
|
|
|
|
if @enclist > 1; |
|
1217
|
|
|
|
|
|
|
$opts->{input_encoding} = $enclist[0]; |
|
1218
|
|
|
|
|
|
|
} #determine_input_encoding |
|
1219
|
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
my sub readparse_csv($@) { |
|
1221
|
|
|
|
|
|
|
my $fh = shift; |
|
1222
|
|
|
|
|
|
|
my %csvopts = ( |
|
1223
|
|
|
|
|
|
|
@sane_CSV_read_options, |
|
1224
|
|
|
|
|
|
|
defined($opts->{quote_char}) ? (quote_char=>$opts->{quote_char}) : (), |
|
1225
|
|
|
|
|
|
|
defined($opts->{sep_char}) ? (sep_char=>$opts->{sep_char}) : (), |
|
1226
|
|
|
|
|
|
|
auto_diag => 2, # throw on error |
|
1227
|
|
|
|
|
|
|
@_ |
|
1228
|
|
|
|
|
|
|
); |
|
1229
|
|
|
|
|
|
|
$csvopts{escape_char} = $csvopts{quote_char}; # must always be the same |
|
1230
|
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
my $csv = Text::CSV->new (\%csvopts) |
|
1232
|
|
|
|
|
|
|
or croak "Text::CSV->new: ", Text::CSV->error_diag(), |
|
1233
|
|
|
|
|
|
|
dvis('\n## %csvopts\n'); |
|
1234
|
|
|
|
|
|
|
my @rows; |
|
1235
|
|
|
|
|
|
|
while (my $F = $csv->getline( $fh )) { |
|
1236
|
|
|
|
|
|
|
push(@rows, $F); |
|
1237
|
|
|
|
|
|
|
} |
|
1238
|
|
|
|
|
|
|
\@rows |
|
1239
|
|
|
|
|
|
|
} |
|
1240
|
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
my sub open_input($) { |
|
1242
|
|
|
|
|
|
|
my $r2octets = shift; |
|
1243
|
|
|
|
|
|
|
oops unless $opts->{input_encoding}; |
|
1244
|
|
|
|
|
|
|
my $fh; |
|
1245
|
|
|
|
|
|
|
my $pathish = defined($$r2octets) |
|
1246
|
|
|
|
|
|
|
? \$$r2octets : $opts->{inpath_sans_sheet}; |
|
1247
|
1
|
|
|
1
|
|
7
|
open($fh, "<:encoding($opts->{input_encoding})", $pathish) |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
11
|
|
|
1248
|
|
|
|
|
|
|
or die "$pathish : $!"; |
|
1249
|
|
|
|
|
|
|
$fh |
|
1250
|
|
|
|
|
|
|
} |
|
1251
|
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
my sub determine_csv_q_sep($$) { |
|
1253
|
|
|
|
|
|
|
my ($r2octets, $r2rows) = @_; |
|
1254
|
|
|
|
|
|
|
return |
|
1255
|
|
|
|
|
|
|
if defined($opts->{quote_char}) && defined($opts->{sep_char}); |
|
1256
|
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
my $fh = open_input($r2octets); |
|
1258
|
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
# my $chars; |
|
1260
|
|
|
|
|
|
|
# if (defined($$r2octets) |
|
1261
|
|
|
|
|
|
|
# $chars = decode($opts->{input_encoding},$$r2octets,Encode::FB_CROAK); |
|
1262
|
|
|
|
|
|
|
# } |
|
1263
|
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
# Try combinations starting with the most-common '"' and ',' while |
|
1265
|
|
|
|
|
|
|
# parsing the file for unsafe unquoted values (throws on syntax error). |
|
1266
|
|
|
|
|
|
|
# The expectation is that the first try usually succeeds |
|
1267
|
|
|
|
|
|
|
Q: |
|
1268
|
|
|
|
|
|
|
for my $q (defined($opts->{quote_char}) |
|
1269
|
|
|
|
|
|
|
? ($opts->{quote_char}) : ("\"", "'")) { |
|
1270
|
|
|
|
|
|
|
my $found_q; |
|
1271
|
|
|
|
|
|
|
SEP: |
|
1272
|
|
|
|
|
|
|
for my $sep (defined($opts->{sep_char}) |
|
1273
|
|
|
|
|
|
|
? ($opts->{sep_char}) : (",","\t")) { |
|
1274
|
|
|
|
|
|
|
btw dvis '--- TRYING $q $sep ---' if $debug; |
|
1275
|
|
|
|
|
|
|
# # Preliminary check for an illegal use of the quote char |
|
1276
|
|
|
|
|
|
|
# if (defined($chars) |
|
1277
|
|
|
|
|
|
|
# && $chars =~ /[^${q}${sep}\x{0D}\x{0A}] |
|
1278
|
|
|
|
|
|
|
# ${q} |
|
1279
|
|
|
|
|
|
|
# (?=[^${q}${sep}\x{0D}\x{0A}] | \z)/gx) { |
|
1280
|
|
|
|
|
|
|
# btw ivis '>>>quote_char CAN NOT BE $q with sep=$sep because q exists mid-field before pos ${\(pos($chars))}' |
|
1281
|
|
|
|
|
|
|
# if $debug; |
|
1282
|
|
|
|
|
|
|
# next SEP |
|
1283
|
|
|
|
|
|
|
# } |
|
1284
|
|
|
|
|
|
|
$$r2rows = eval{ readparse_csv($fh, quote_char=>$q, sep_char=>$sep) }; |
|
1285
|
|
|
|
|
|
|
if ($@ eq "") { |
|
1286
|
|
|
|
|
|
|
warn ivis '>> Detected quote_char=$q sep_char=$sep\n' if $debug; |
|
1287
|
|
|
|
|
|
|
$opts->{quote_char} = $q; |
|
1288
|
|
|
|
|
|
|
$opts->{sep_char} = $sep; |
|
1289
|
|
|
|
|
|
|
last Q; |
|
1290
|
|
|
|
|
|
|
} |
|
1291
|
|
|
|
|
|
|
warn vis '$@\nq=$q sep=$sep did not work...\n' if $debug; |
|
1292
|
|
|
|
|
|
|
seek $fh, 0, SEEK_SET; |
|
1293
|
|
|
|
|
|
|
} |
|
1294
|
|
|
|
|
|
|
} |
|
1295
|
|
|
|
|
|
|
unless (defined($$r2rows)) { |
|
1296
|
|
|
|
|
|
|
confess "Input file is not valid CSV (or we have a bug)\n" |
|
1297
|
|
|
|
|
|
|
} |
|
1298
|
|
|
|
|
|
|
}#determine_csv_q_sep |
|
1299
|
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
my sub determine_csv_col_formats($$) { |
|
1301
|
|
|
|
|
|
|
my ($r2octets, $r2rows) = @_; |
|
1302
|
|
|
|
|
|
|
return |
|
1303
|
|
|
|
|
|
|
if defined $opts->{col_formats}; |
|
1304
|
|
|
|
|
|
|
$$r2rows //= do{ |
|
1305
|
|
|
|
|
|
|
my $fh = open_input($r2octets); |
|
1306
|
|
|
|
|
|
|
readparse_csv($fh); |
|
1307
|
|
|
|
|
|
|
}; |
|
1308
|
|
|
|
|
|
|
my $max_cols = 0; for my $row (@{ $$r2rows }) { $max_cols = @$row if $max_cols < @$row } |
|
1309
|
|
|
|
|
|
|
state $curr_yy = (localtime(time))[5]; |
|
1310
|
|
|
|
|
|
|
my @col_formats; |
|
1311
|
|
|
|
|
|
|
my sub recognized($$$$;$) { |
|
1312
|
|
|
|
|
|
|
my ($cx, $rx, $thing, $format, $as_msg) = @_; |
|
1313
|
|
|
|
|
|
|
$col_formats[$cx] = $format; |
|
1314
|
|
|
|
|
|
|
return unless $debug; |
|
1315
|
|
|
|
|
|
|
$as_msg //= " as ".vis($col_formats[$cx])." format"; |
|
1316
|
|
|
|
|
|
|
if (length($thing) > 35) { $thing = substr($thing,0,32)."..."; } |
|
1317
|
|
|
|
|
|
|
@_ = ("Recognized ",$thing," in ", cxrx2sheetaddr($cx,$rx), $as_msg); |
|
1318
|
|
|
|
|
|
|
goto &btw |
|
1319
|
|
|
|
|
|
|
} |
|
1320
|
|
|
|
|
|
|
CX: |
|
1321
|
|
|
|
|
|
|
for my $cx (0..$max_cols-1) { |
|
1322
|
|
|
|
|
|
|
RX: |
|
1323
|
|
|
|
|
|
|
for my $rx (0..$#{$$r2rows}) { |
|
1324
|
|
|
|
|
|
|
my $row = $$r2rows->[$rx]; |
|
1325
|
|
|
|
|
|
|
next if $cx > $#$row; # row has fewer columns than others |
|
1326
|
|
|
|
|
|
|
for ($row->[$cx]) { |
|
1327
|
|
|
|
|
|
|
# recognize obvious Y/M/D or M/D/Y or D/M/Y date forms |
|
1328
|
|
|
|
|
|
|
if (m#\b(?<y>(?:[12]\d)?\d\d)/(?<m>\d\d)/(?<d>\d\d)\b#) { |
|
1329
|
|
|
|
|
|
|
if ($+{d} > 12 && $+{d} <= 31 && $+{m} >= 1 && $+{m} <= 12 |
|
1330
|
|
|
|
|
|
|
&& ($+{y} < 100 || $+{y} >= 1000)) { |
|
1331
|
|
|
|
|
|
|
recognized($cx,$rx,$_,"YY/MM/DD"); |
|
1332
|
|
|
|
|
|
|
next CX; |
|
1333
|
|
|
|
|
|
|
} |
|
1334
|
|
|
|
|
|
|
# If ambiguous YYYY/??/?? we can still assume it is a date and not text |
|
1335
|
|
|
|
|
|
|
if (length($+{y})==4) { |
|
1336
|
|
|
|
|
|
|
#recognized($cx,$rx,$_,""," as some kind of date, fmt unknown"); |
|
1337
|
|
|
|
|
|
|
next RX; |
|
1338
|
|
|
|
|
|
|
} |
|
1339
|
|
|
|
|
|
|
} |
|
1340
|
|
|
|
|
|
|
if (m#\b(?<m>\d\d)/(?<d>\d\d)/(?<y>(?:[12]\d)?\d\d)\b#) { |
|
1341
|
|
|
|
|
|
|
if ($+{y} < 100 || $+{y} >= 1000) { |
|
1342
|
|
|
|
|
|
|
if ($+{d} > 12 && $+{d} <= 31 && $+{m} >= 1 && $+{m} <= 12) { |
|
1343
|
|
|
|
|
|
|
recognized($cx,$rx,$_,"MM/DD/YY"); |
|
1344
|
|
|
|
|
|
|
next CX |
|
1345
|
|
|
|
|
|
|
} |
|
1346
|
|
|
|
|
|
|
elsif ($+{m} > 12 && $+{m} <= 31 && $+{d} >= 1 && $+{d} <= 12) { |
|
1347
|
|
|
|
|
|
|
recognized($cx,$rx,$_,"DD/MM/YY"); |
|
1348
|
|
|
|
|
|
|
next CX |
|
1349
|
|
|
|
|
|
|
} |
|
1350
|
|
|
|
|
|
|
} |
|
1351
|
|
|
|
|
|
|
# If ambiguous ??/??/YYYY we can still assume it is a date and not text |
|
1352
|
|
|
|
|
|
|
if (length($+{y})==4) { |
|
1353
|
|
|
|
|
|
|
#recognized($cx,$rx,$_,""," as some kind of date, fmt unknown"); |
|
1354
|
|
|
|
|
|
|
next RX; |
|
1355
|
|
|
|
|
|
|
} |
|
1356
|
|
|
|
|
|
|
} |
|
1357
|
|
|
|
|
|
|
# Things to force to be read as text fields: |
|
1358
|
|
|
|
|
|
|
# 1. Leading zeroes |
|
1359
|
|
|
|
|
|
|
# 2. Leading ascii minus (\x{2D}) rather than math minus \N{U+2212}. |
|
1360
|
|
|
|
|
|
|
# This prevents conversion to the Unicode math minus when LO |
|
1361
|
|
|
|
|
|
|
# reads the CSV. The assumption is that if the input has an ascii |
|
1362
|
|
|
|
|
|
|
# minus then the original spreadsheet format was "text" not |
|
1363
|
|
|
|
|
|
|
# numeric. |
|
1364
|
|
|
|
|
|
|
if (/^[\x{2D}0]/) { |
|
1365
|
|
|
|
|
|
|
recognized($cx,$rx,$_,"text"); |
|
1366
|
|
|
|
|
|
|
next CX; |
|
1367
|
|
|
|
|
|
|
} |
|
1368
|
|
|
|
|
|
|
} |
|
1369
|
|
|
|
|
|
|
} |
|
1370
|
|
|
|
|
|
|
} |
|
1371
|
|
|
|
|
|
|
$opts->{col_formats} = \@col_formats; |
|
1372
|
|
|
|
|
|
|
}#determine_csv_col_formats |
|
1373
|
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
# ==BODY== |
|
1375
|
|
|
|
|
|
|
unless ($opts->{cvt_to}) { |
|
1376
|
|
|
|
|
|
|
if ($opts->{outpath} && $opts->{outpath} =~ /\.([^.]+)$/) { |
|
1377
|
|
|
|
|
|
|
$opts->{cvt_to} = $1; |
|
1378
|
|
|
|
|
|
|
} |
|
1379
|
|
|
|
|
|
|
croak "'cvt_to' was not specified and can not be intuited from 'outpath'" |
|
1380
|
|
|
|
|
|
|
,dvis('\n### $opts') ###TEMP |
|
1381
|
|
|
|
|
|
|
unless $opts->{cvt_to}; |
|
1382
|
|
|
|
|
|
|
} |
|
1383
|
|
|
|
|
|
|
unless ($opts->{cvt_from}) { |
|
1384
|
|
|
|
|
|
|
if ($opts->{inpath_sans_sheet} =~ /\.([^.]+)$/) { |
|
1385
|
|
|
|
|
|
|
$opts->{cvt_from} = $1; |
|
1386
|
|
|
|
|
|
|
} |
|
1387
|
|
|
|
|
|
|
} |
|
1388
|
|
|
|
|
|
|
$opts->{cvt_from} =~ s/^\.txt$/.csv/i if $opts->{cvt_from}; |
|
1389
|
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
# Detect file format and, if CSV, encoding |
|
1391
|
|
|
|
|
|
|
my ($octets, $rows); |
|
1392
|
|
|
|
|
|
|
if (!$opts->{cvt_from} || $opts->{cvt_from} eq "csv") { |
|
1393
|
|
|
|
|
|
|
determine_input_encoding(\$octets); |
|
1394
|
|
|
|
|
|
|
} |
|
1395
|
|
|
|
|
|
|
if (!$opts->{cvt_from}) { |
|
1396
|
|
|
|
|
|
|
# Detect the file format by looking at the data. Actually, we only |
|
1397
|
|
|
|
|
|
|
# support CSV in this case, so this is just a (half-baked) sanity check. |
|
1398
|
|
|
|
|
|
|
eval { |
|
1399
|
|
|
|
|
|
|
determine_csv_q_sep(\$octets, \$rows); |
|
1400
|
|
|
|
|
|
|
if (!$opts->{cvt_from}) { |
|
1401
|
|
|
|
|
|
|
$rows //= do{ |
|
1402
|
|
|
|
|
|
|
my $fh = open_input(\$octets); |
|
1403
|
|
|
|
|
|
|
readparse_csv($fh); |
|
1404
|
|
|
|
|
|
|
}; |
|
1405
|
|
|
|
|
|
|
} |
|
1406
|
|
|
|
|
|
|
}; |
|
1407
|
|
|
|
|
|
|
if ($@ eq "") { |
|
1408
|
|
|
|
|
|
|
warn "> Detected $opts->{inpath_sans_sheet} as a seemingly-valid CSV\n" |
|
1409
|
|
|
|
|
|
|
if $debug; |
|
1410
|
|
|
|
|
|
|
$opts->{cvt_from} = "csv"; |
|
1411
|
|
|
|
|
|
|
} else { |
|
1412
|
|
|
|
|
|
|
croak "Can not detect what kind of file ",qsh($opts->{inpath})," is\n"; |
|
1413
|
|
|
|
|
|
|
} |
|
1414
|
|
|
|
|
|
|
} |
|
1415
|
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
if ($opts->{cvt_from} eq "csv") { |
|
1417
|
|
|
|
|
|
|
determine_csv_col_formats(\$octets, \$rows); |
|
1418
|
|
|
|
|
|
|
} else { |
|
1419
|
|
|
|
|
|
|
oops if defined($octets) or defined($rows); |
|
1420
|
|
|
|
|
|
|
} |
|
1421
|
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
# Set default ouput_encoding if not specified |
|
1423
|
|
|
|
|
|
|
$opts->{output_encoding} //= $default_output_encoding |
|
1424
|
|
|
|
|
|
|
if $opts->{cvt_to} eq "csv"; |
|
1425
|
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
}#_determine_enc_tofrom |
|
1427
|
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
sub _tool_extract_all_csvs($$) { |
|
1429
|
0
|
|
|
0
|
|
0
|
my ($opts, $destdir) = @_; |
|
1430
|
|
|
|
|
|
|
|
|
1431
|
0
|
|
|
|
|
0
|
_get_exclusive_lock($opts); |
|
1432
|
0
|
|
|
0
|
|
0
|
scope_guard { _release_lock($opts); }; |
|
|
0
|
|
|
|
|
0
|
|
|
1433
|
|
|
|
|
|
|
|
|
1434
|
0
|
|
|
|
|
0
|
delete local $opts->{sheetname}; |
|
1435
|
0
|
|
|
|
|
0
|
local $opts->{allsheets} = 1; |
|
1436
|
0
|
0
|
|
|
|
0
|
if (_openlibre_supports_allsheets()) { |
|
|
|
0
|
|
|
|
|
|
|
1437
|
0
|
|
|
|
|
0
|
_convert_using_openlibre($opts, $opts->{inpath_sans_sheet}, $destdir); |
|
1438
|
|
|
|
|
|
|
} |
|
1439
|
|
|
|
|
|
|
elsif (_ssconvert_supports_allsheets()) { |
|
1440
|
0
|
|
|
|
|
0
|
_convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destdir); |
|
1441
|
|
|
|
|
|
|
} |
|
1442
|
0
|
|
|
|
|
0
|
else { confess "Can't extract 'allsheets'. Please install LibreOffice 7.2 or newer" } |
|
1443
|
|
|
|
|
|
|
} |
|
1444
|
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
sub _tool_can_extract_csv_byname() { |
|
1446
|
0
|
0
|
|
0
|
|
0
|
_openlibre_supports_named_sheet() || _ssconvert_supports_named_sheet() |
|
1447
|
|
|
|
|
|
|
} |
|
1448
|
|
|
|
|
|
|
sub _tool_extract_one_csv($$) { |
|
1449
|
0
|
|
|
0
|
|
0
|
my ($opts, $destpath) = @_; |
|
1450
|
|
|
|
|
|
|
|
|
1451
|
0
|
|
|
|
|
0
|
_get_exclusive_lock($opts); |
|
1452
|
0
|
|
|
0
|
|
0
|
scope_guard { _release_lock($opts); }; |
|
|
0
|
|
|
|
|
0
|
|
|
1453
|
|
|
|
|
|
|
|
|
1454
|
0
|
0
|
|
|
|
0
|
if (_openlibre_features()->{available}) { |
|
1455
|
0
|
0
|
0
|
|
|
0
|
oops if $opts->{sheetname} && !_openlibre_supports_named_sheet(); |
|
1456
|
0
|
|
|
|
|
0
|
_convert_using_openlibre($opts, $opts->{inpath_sans_sheet}, $destpath); |
|
1457
|
|
|
|
|
|
|
} else { |
|
1458
|
0
|
|
|
|
|
0
|
_convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destpath); |
|
1459
|
|
|
|
|
|
|
} |
|
1460
|
|
|
|
|
|
|
} |
|
1461
|
|
|
|
|
|
|
sub _tool_can_extract_current_sheet() { |
|
1462
|
|
|
|
|
|
|
_openlibre_features()->{available} || _ssconvert_features()->{available} |
|
1463
|
0
|
0
|
|
0
|
|
0
|
} |
|
1464
|
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
sub _tool_write_spreadsheet($$) { |
|
1466
|
0
|
|
|
0
|
|
0
|
my ($opts, $destpath) = @_; |
|
1467
|
|
|
|
|
|
|
|
|
1468
|
0
|
|
|
|
|
0
|
_get_exclusive_lock($opts); |
|
1469
|
0
|
|
|
0
|
|
0
|
scope_guard { _release_lock($opts); }; |
|
|
0
|
|
|
|
|
0
|
|
|
1470
|
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
# ssconvert allows specifying the sheetname when importing a csv but not LO |
|
1472
|
0
|
0
|
0
|
|
|
0
|
if ($opts->{sheetname} && _ssconvert_supports_writing($opts->{cvt_to})) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1473
|
0
|
|
|
|
|
0
|
_convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destpath); |
|
1474
|
|
|
|
|
|
|
} |
|
1475
|
|
|
|
|
|
|
elsif (_openlibre_supports_writing($opts->{cvt_to})) { |
|
1476
|
0
|
0
|
|
|
|
0
|
if ($opts->{sheetname}) { |
|
1477
|
0
|
|
|
|
|
0
|
carp "WARNING: Sheet name when creating a spreadsheet will be ignored\n"; |
|
1478
|
0
|
|
|
|
|
0
|
delete $opts->{sheetname}; |
|
1479
|
|
|
|
|
|
|
} |
|
1480
|
0
|
|
|
|
|
0
|
_convert_using_openlibre($opts, $opts->{inpath_sans_sheet}, $destpath); |
|
1481
|
|
|
|
|
|
|
} |
|
1482
|
|
|
|
|
|
|
elsif (_ssconvert_supports_writing($opts->{cvt_to})) { |
|
1483
|
0
|
|
|
|
|
0
|
_convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destpath); |
|
1484
|
|
|
|
|
|
|
} |
|
1485
|
0
|
|
|
|
|
0
|
else { croak "Can't create $opts->{cvt_to} spreadsheets. Please install LibreOffice 7.2 or newer" } |
|
1486
|
|
|
|
|
|
|
} |
|
1487
|
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
|
|
1489
|
|
|
|
|
|
|
# Extract CSVs for every sheet into {outpath} (setting to tmpdir if not preset). |
|
1490
|
|
|
|
|
|
|
# If cached CSVs are available they are moved into {outpath}/ . |
|
1491
|
|
|
|
|
|
|
sub _extract_all_csvs($) { |
|
1492
|
0
|
|
|
0
|
|
0
|
my ($opts) = @_; |
|
1493
|
0
|
|
|
|
|
0
|
my $outpath = _final_outpath($opts); |
|
1494
|
0
|
|
|
|
|
0
|
$outpath->mkpath; # nop if exists, croaks if conflicts with file |
|
1495
|
|
|
|
|
|
|
|
|
1496
|
0
|
|
|
|
|
0
|
_tool_extract_all_csvs($opts, $outpath); #logs |
|
1497
|
|
|
|
|
|
|
} |
|
1498
|
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
# Extract a single sheet into a CSV at {outpath} (defaulting to temp file). |
|
1501
|
|
|
|
|
|
|
# If a cached CSV is available it is moved to {outpath}. |
|
1502
|
|
|
|
|
|
|
sub _extract_one_csv($) { |
|
1503
|
|
|
|
|
|
|
my ($opts) = @_; |
|
1504
|
|
|
|
|
|
|
my $cachedirpath = _cachedir($opts); |
|
1505
|
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
my sub _fill_csv_cache() { |
|
1507
|
|
|
|
|
|
|
$cachedirpath->remove_tree; |
|
1508
|
|
|
|
|
|
|
$cachedirpath->mkpath; |
|
1509
|
|
|
|
|
|
|
{ #local $opts->{verbose} = 0; |
|
1510
|
|
|
|
|
|
|
#local $opts->{debug} = 0; |
|
1511
|
|
|
|
|
|
|
_tool_extract_all_csvs($opts, $cachedirpath); |
|
1512
|
|
|
|
|
|
|
} |
|
1513
|
|
|
|
|
|
|
} |
|
1514
|
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
my $outpath = _final_outpath($opts); |
|
1516
|
|
|
|
|
|
|
$outpath->remove unless -d $outpath; |
|
1517
|
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
if (defined($opts->{sheetname})) { |
|
1519
|
|
|
|
|
|
|
my $fname = $opts->{sheetname}.".csv"; |
|
1520
|
|
|
|
|
|
|
my $cached_path = $cachedirpath->child($fname); |
|
1521
|
|
|
|
|
|
|
if (! -e $cached_path) { |
|
1522
|
|
|
|
|
|
|
if (_tool_can_extract_csv_byname()) { |
|
1523
|
|
|
|
|
|
|
_tool_extract_one_csv($opts, $outpath); #logs |
|
1524
|
|
|
|
|
|
|
return |
|
1525
|
|
|
|
|
|
|
} |
|
1526
|
|
|
|
|
|
|
warn ">>Emulating extract-by-name by extracting all csvs into cache...\n" |
|
1527
|
|
|
|
|
|
|
if $opts->{debug}; |
|
1528
|
|
|
|
|
|
|
_fill_csv_cache; |
|
1529
|
|
|
|
|
|
|
} |
|
1530
|
|
|
|
|
|
|
croak "Sheet '$opts->{sheetname}' does not exist in $opts->{inpath_sans_sheet}\n" |
|
1531
|
|
|
|
|
|
|
unless -e $cached_path; |
|
1532
|
|
|
|
|
|
|
warn "> Moving cached $fname to $outpath\n" if $opts->{verbose}; |
|
1533
|
|
|
|
|
|
|
File::Copy::move($cached_path, $outpath); |
|
1534
|
|
|
|
|
|
|
return |
|
1535
|
|
|
|
|
|
|
} |
|
1536
|
|
|
|
|
|
|
elsif (_tool_can_extract_current_sheet()) { |
|
1537
|
|
|
|
|
|
|
_tool_extract_one_csv($opts, $outpath); #logs |
|
1538
|
|
|
|
|
|
|
return |
|
1539
|
|
|
|
|
|
|
} |
|
1540
|
|
|
|
|
|
|
else { |
|
1541
|
|
|
|
|
|
|
_fill_csv_cache; |
|
1542
|
|
|
|
|
|
|
my @children = $cachedirpath->children; |
|
1543
|
|
|
|
|
|
|
if (@children == 0) { |
|
1544
|
|
|
|
|
|
|
croak "$opts->{inpath_sans_sheet} appears to have zero sheets!\n" |
|
1545
|
|
|
|
|
|
|
} |
|
1546
|
|
|
|
|
|
|
elsif (@children == 1) { |
|
1547
|
|
|
|
|
|
|
my $fname = $children[0]->basename; |
|
1548
|
|
|
|
|
|
|
my $cached_path = $cachedirpath->child($fname); |
|
1549
|
|
|
|
|
|
|
warn "> Moving cached $fname to $outpath\n" if $opts->{verbose}; |
|
1550
|
|
|
|
|
|
|
File::Copy::move($cached_path, $outpath); |
|
1551
|
|
|
|
|
|
|
return |
|
1552
|
|
|
|
|
|
|
} |
|
1553
|
|
|
|
|
|
|
else { |
|
1554
|
|
|
|
|
|
|
croak "$opts->{inpath_sans_sheet} contains multiple sheets; you must specify a sheetname\n" |
|
1555
|
|
|
|
|
|
|
} |
|
1556
|
|
|
|
|
|
|
} |
|
1557
|
|
|
|
|
|
|
} |
|
1558
|
|
|
|
|
|
|
sub _write_spreadsheet($) { |
|
1559
|
0
|
|
|
0
|
|
0
|
my ($opts) = @_; |
|
1560
|
|
|
|
|
|
|
|
|
1561
|
0
|
|
|
|
|
0
|
my $outpath = _final_outpath($opts); |
|
1562
|
0
|
0
|
|
|
|
0
|
$outpath->remove unless -d $outpath; |
|
1563
|
|
|
|
|
|
|
|
|
1564
|
0
|
|
|
|
|
0
|
_tool_write_spreadsheet($opts, $outpath); |
|
1565
|
|
|
|
|
|
|
} |
|
1566
|
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
# If {outpath} is not set, set it to a unique output path in $tempdir |
|
1568
|
|
|
|
|
|
|
# Always returns {outpath} as a Path::Tiny object. |
|
1569
|
|
|
|
|
|
|
sub _final_outpath($) { |
|
1570
|
0
|
|
|
0
|
|
0
|
my $opts = shift; |
|
1571
|
0
|
0
|
|
|
|
0
|
if (defined $opts->{outpath}) { |
|
1572
|
0
|
|
|
|
|
0
|
return path($opts->{outpath}); |
|
1573
|
|
|
|
|
|
|
} else { |
|
1574
|
0
|
0
|
|
|
|
0
|
my $suf = $opts->{cvt_to} unless $opts->{allsheets}; |
|
1575
|
|
|
|
|
|
|
return( |
|
1576
|
0
|
|
|
|
|
0
|
($opts->{outpath}=_path_under_tempdir($opts, suf=>$suf)) |
|
1577
|
|
|
|
|
|
|
); |
|
1578
|
|
|
|
|
|
|
} |
|
1579
|
|
|
|
|
|
|
} |
|
1580
|
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
sub convert_spreadsheet(@) { |
|
1582
|
|
|
|
|
|
|
# Set inpath_sans_sheet, sheetname, ifbase, etc. |
|
1583
|
2
|
|
|
2
|
1
|
8
|
my %opts = &_process_args; |
|
1584
|
2
|
50
|
|
|
|
14
|
btw dvis('>>> convert_spreadsheet %opts\n') if $opts{debug}; |
|
1585
|
2
|
|
|
|
|
11
|
my %input_opts = %opts; |
|
1586
|
|
|
|
|
|
|
|
|
1587
|
2
|
|
|
|
|
13
|
_create_tempdir_if_needed(\%opts); |
|
1588
|
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
# intuit cvt_from & cvt_to, detect encoding, and pre-process .csv input |
|
1590
|
|
|
|
|
|
|
# if needed to avoid corruption of leading zeroes. |
|
1591
|
2
|
|
|
|
|
63
|
_determine_enc_tofrom(\%opts); |
|
1592
|
|
|
|
|
|
|
|
|
1593
|
2
|
|
|
|
|
15
|
my $input_enc = $opts{input_encoding}; |
|
1594
|
2
|
|
|
|
|
7
|
my $output_enc = $opts{output_encoding}; |
|
1595
|
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
croak "Either input or output must be 'csv'\n" |
|
1597
|
2
|
50
|
33
|
|
|
12
|
unless $opts{cvt_from} eq 'csv' || $opts{cvt_to} eq 'csv'; |
|
1598
|
2
|
50
|
|
|
|
7
|
if ($opts{allsheets}) { |
|
1599
|
|
|
|
|
|
|
croak "'allsheets' is allowed only with cvt_to => 'csv'" |
|
1600
|
0
|
0
|
0
|
|
|
0
|
unless ($opts{cvt_to}//"") eq "csv"; |
|
1601
|
|
|
|
|
|
|
croak "With 'allsheets', a sheet name may not be specified\n" |
|
1602
|
0
|
0
|
|
|
|
0
|
if $opts{sheetname}; |
|
1603
|
|
|
|
|
|
|
croak "With 'allsheets', 'outpath' must be a directory if it exists\n" |
|
1604
|
0
|
0
|
0
|
|
|
0
|
if $opts{outpath} && -e $opts{outpath} && ! -d _; |
|
|
|
|
0
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
} |
|
1606
|
|
|
|
|
|
|
|
|
1607
|
2
|
|
|
|
|
4
|
my $done; |
|
1608
|
2
|
50
|
|
|
|
6
|
if ($opts{cvt_from} eq $opts{cvt_to}) { # csv to csv |
|
1609
|
2
|
50
|
|
|
|
5
|
if (!$opts{allsheets}) { |
|
1610
|
2
|
50
|
|
|
|
7
|
if ($input_enc ne $output_enc) { |
|
1611
|
|
|
|
|
|
|
# Special case #1: in & out are CSVs but different encodings. |
|
1612
|
|
|
|
|
|
|
warn "> Transcoding csv: $input_enc -> $output_enc\n" |
|
1613
|
0
|
0
|
|
|
|
0
|
if $opts{debug}; |
|
1614
|
0
|
|
|
|
|
0
|
my $octets = $opts{inpath_sans_sheet}->slurp_raw; |
|
1615
|
0
|
|
|
|
|
0
|
my $chars = decode($input_enc, $octets, Encode::FB_CROAK); |
|
1616
|
0
|
|
|
|
|
0
|
$octets = encode($output_enc, $chars, Encode::FB_CROAK); |
|
1617
|
0
|
|
|
|
|
0
|
path(_final_outpath(\%opts))->spew_raw($octets); |
|
1618
|
0
|
|
|
|
|
0
|
$done = 1; |
|
1619
|
|
|
|
|
|
|
} else { |
|
1620
|
|
|
|
|
|
|
# Special case #2: No conversion is needed: Just copy the file or |
|
1621
|
|
|
|
|
|
|
# return the input path itself as the output |
|
1622
|
2
|
50
|
|
|
|
7
|
if (defined $opts{outpath}) { |
|
1623
|
|
|
|
|
|
|
warn "> No conversion needed, copying into ",qsh($opts{outpath}),"\n" |
|
1624
|
0
|
0
|
|
|
|
0
|
if $opts{verbose}; |
|
1625
|
0
|
|
|
|
|
0
|
$opts{inpath_sans_sheet}->copy($opts{outpath}); |
|
1626
|
0
|
|
|
|
|
0
|
$done = 1; |
|
1627
|
|
|
|
|
|
|
} else { |
|
1628
|
2
|
|
|
|
|
5
|
$opts{outpath} = $opts{inpath_sans_sheet}; |
|
1629
|
|
|
|
|
|
|
warn "> No conversion needed, returning ", qsh($opts{outpath}),"\n" |
|
1630
|
2
|
50
|
|
|
|
5
|
if $opts{verbose}; |
|
1631
|
2
|
|
|
|
|
5
|
$done = 1; |
|
1632
|
|
|
|
|
|
|
} |
|
1633
|
|
|
|
|
|
|
} |
|
1634
|
|
|
|
|
|
|
} |
|
1635
|
|
|
|
|
|
|
else { |
|
1636
|
|
|
|
|
|
|
# Special case #2: <allsheets> with input already a csv: |
|
1637
|
|
|
|
|
|
|
# Leave a symlink to the input in the <outpath> directory. |
|
1638
|
0
|
0
|
|
|
|
0
|
croak "transcoding not implemented in this situation" |
|
1639
|
|
|
|
|
|
|
if ($input_enc ne $output_enc); |
|
1640
|
0
|
|
|
|
|
0
|
my $outpath = path(_final_outpath(\%opts)); |
|
1641
|
0
|
|
|
|
|
0
|
$outpath->mkpath; # nop if exists, croaks if conflicts with file |
|
1642
|
0
|
|
|
|
|
0
|
my $dest = $outpath->child( $opts{ifbase}.".csv" ); |
|
1643
|
0
|
|
|
|
|
0
|
my $inpath = $opts{inpath_sans_sheet}; |
|
1644
|
0
|
|
|
|
|
0
|
my $s = eval{ symlink($inpath, $dest) }; |
|
|
0
|
|
|
|
|
0
|
|
|
1645
|
0
|
0
|
0
|
|
|
0
|
if ($@ or !$s) { # symlink unimplmented or insufficient permissions |
|
1646
|
0
|
0
|
|
|
|
0
|
btw dvis '>> $@' if $opts{debug}; |
|
1647
|
|
|
|
|
|
|
warn "> No conversion needed! Copying into ", qsh($dest),"\n" |
|
1648
|
0
|
0
|
|
|
|
0
|
if $opts{verbose}; |
|
1649
|
0
|
|
|
|
|
0
|
$opts{inpath_sans_sheet}->copy($dest); |
|
1650
|
|
|
|
|
|
|
} else { |
|
1651
|
|
|
|
|
|
|
warn "> No conversion needed! Left symlink at ", qsh($dest),"\n" |
|
1652
|
0
|
0
|
|
|
|
0
|
if $opts{verbose}; |
|
1653
|
|
|
|
|
|
|
} |
|
1654
|
0
|
|
|
|
|
0
|
$done = 1; |
|
1655
|
|
|
|
|
|
|
} |
|
1656
|
|
|
|
|
|
|
} |
|
1657
|
2
|
50
|
|
|
|
7
|
if (! $done) { |
|
1658
|
0
|
0
|
|
|
|
0
|
if ($opts{allsheets}) { |
|
1659
|
0
|
|
|
|
|
0
|
_extract_all_csvs(\%opts); |
|
1660
|
|
|
|
|
|
|
} |
|
1661
|
|
|
|
|
|
|
else { |
|
1662
|
|
|
|
|
|
|
# Result will be a single file. |
|
1663
|
0
|
0
|
|
|
|
0
|
if ($opts{cvt_to} eq "csv") { |
|
1664
|
0
|
|
|
|
|
0
|
_extract_one_csv(\%opts); |
|
1665
|
|
|
|
|
|
|
} else { |
|
1666
|
0
|
|
|
|
|
0
|
_write_spreadsheet(\%opts); |
|
1667
|
|
|
|
|
|
|
} |
|
1668
|
|
|
|
|
|
|
} |
|
1669
|
|
|
|
|
|
|
} |
|
1670
|
|
|
|
|
|
|
my $result = { |
|
1671
|
|
|
|
|
|
|
defined($output_enc) ? (encoding => $output_enc):(), |
|
1672
|
8
|
|
|
|
|
30
|
(map{ my $v = $opts{$_}; |
|
1673
|
8
|
100
|
|
|
|
35
|
($_ => (blessed($v) ? $v->stringify : $v)) |
|
1674
|
2
|
50
|
|
|
|
7
|
} grep{ defined $opts{$_} } |
|
|
10
|
|
|
|
|
23
|
|
|
1675
|
|
|
|
|
|
|
qw/inpath_sans_sheet outpath cvt_from cvt_to sheetname/) |
|
1676
|
|
|
|
|
|
|
}; |
|
1677
|
|
|
|
|
|
|
log_call [\%input_opts], [$result, \_fmt_outpath_contents($result)] |
|
1678
|
2
|
50
|
|
|
|
8
|
if $opts{verbose}; |
|
1679
|
|
|
|
|
|
|
|
|
1680
|
2
|
|
|
|
|
9
|
$result; |
|
1681
|
|
|
|
|
|
|
}#convert_spreadsheet |
|
1682
|
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
# Open as a CSV, intuiting input encoding, converting spreadsheet if necessary. |
|
1684
|
|
|
|
|
|
|
# |
|
1685
|
|
|
|
|
|
|
# :crlf translation is enabled on the resulting file handle, which converts |
|
1686
|
|
|
|
|
|
|
# DOS CR,LF to \n while passing *nix bare LF through unmolested. |
|
1687
|
|
|
|
|
|
|
# |
|
1688
|
|
|
|
|
|
|
# Input argument(s) are the same as for convert_spreadsheet (except |
|
1689
|
|
|
|
|
|
|
# outpath may not be specified). |
|
1690
|
|
|
|
|
|
|
# |
|
1691
|
|
|
|
|
|
|
# Returns a hash containing the file handle and other information. |
|
1692
|
|
|
|
|
|
|
sub OpenAsCsv { |
|
1693
|
2
|
50
|
|
2
|
1
|
31
|
my %opts = ( |
|
1694
|
|
|
|
|
|
|
(@_ == 1 ? (inpath => $_[0]) : (@_)), |
|
1695
|
|
|
|
|
|
|
cvt_to => 'csv', |
|
1696
|
|
|
|
|
|
|
); |
|
1697
|
|
|
|
|
|
|
# TODO: Rename {path} to {inpath} in all usages and rm this cruft; |
|
1698
|
|
|
|
|
|
|
carp "Obsolete OpenAsCsv usage: Change path to inpath\n" |
|
1699
|
2
|
50
|
33
|
|
|
12
|
if exists($opts{path}) and !$opts{silent}; |
|
1700
|
2
|
|
33
|
|
|
21
|
$opts{inpath} //= delete $opts{path}; # be compatible with old API |
|
1701
|
|
|
|
|
|
|
|
|
1702
|
2
|
|
|
|
|
8
|
my $inpath = delete $opts{inpath}; |
|
1703
|
2
|
50
|
|
|
|
19
|
croak "OpenAsCsv: missing 'inpath' option\n" unless $inpath; |
|
1704
|
2
|
50
|
|
|
|
7
|
croak "OpenAsCsv: outpath may not be specified\n" if $opts{outpath}; |
|
1705
|
|
|
|
|
|
|
|
|
1706
|
2
|
|
|
|
|
11
|
my $h = convert_spreadsheet($inpath, %opts, verbose => $opts{debug}); |
|
1707
|
2
|
50
|
|
|
|
9
|
oops "sheetname key bug" if exists $h->{sheet}; |
|
1708
|
|
|
|
|
|
|
|
|
1709
|
2
|
|
33
|
|
|
8
|
my $csvpath = $h->{outpath} // oops; # same as {inpath} if already a CSV |
|
1710
|
2
|
50
|
|
|
|
87
|
open my $fh, "<", $csvpath or croak "$csvpath : $!\n"; |
|
1711
|
2
|
50
|
|
|
|
45
|
binmode $fh, ":crlf:encoding(".$h->{encoding}.")" or die "binmode:$!"; |
|
1712
|
|
|
|
|
|
|
|
|
1713
|
|
|
|
|
|
|
my $r = { |
|
1714
|
|
|
|
|
|
|
fh => $fh, |
|
1715
|
|
|
|
|
|
|
csvpath => $csvpath, |
|
1716
|
|
|
|
|
|
|
inpath => $inpath, |
|
1717
|
2
|
100
|
|
|
|
120
|
(map{ exists($h->{$_}) ? ($_ => $h->{$_}) : () } |
|
|
10
|
|
|
|
|
39
|
|
|
1718
|
|
|
|
|
|
|
qw/inpath_sans_sheet sheetname encoding tempdir raw_values/), |
|
1719
|
|
|
|
|
|
|
}; |
|
1720
|
|
|
|
|
|
|
|
|
1721
|
2
|
|
|
|
|
15
|
return $r; |
|
1722
|
|
|
|
|
|
|
} |
|
1723
|
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
1; |
|
1725
|
|
|
|
|
|
|
__END__ |
|
1726
|
|
|
|
|
|
|
|
|
1727
|
|
|
|
|
|
|
=pod |
|
1728
|
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
=head1 NAME |
|
1730
|
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
Spreadsheet::Edit::IO - convert between spreadsheet and csv files |
|
1732
|
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
1734
|
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
use Spreadsheet::Edit::IO qw/ |
|
1736
|
|
|
|
|
|
|
convert_spreadsheet OpenAsCsv |
|
1737
|
|
|
|
|
|
|
cx2let let2cx |
|
1738
|
|
|
|
|
|
|
@sane_CSV_read_options @sane_CSV_write_options/; |
|
1739
|
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
# Open a CSV file or result of converting a sheet from a spreadsheet |
|
1741
|
|
|
|
|
|
|
my $hash = OpenAsCsv("/path/to/spreadsheet.odt!Sheet1"); # single-arg form |
|
1742
|
|
|
|
|
|
|
my $hash = OpenAsCsv(inpath => "/path/to/spreadsheet.odt", |
|
1743
|
|
|
|
|
|
|
sheetname -> "Sheet1"); |
|
1744
|
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
print "Reading ",$hash->{csvpath}," with encoding ",$hash->{encoding},"\n"; |
|
1746
|
|
|
|
|
|
|
while (<$hash->{fh}>) { ... } |
|
1747
|
|
|
|
|
|
|
|
|
1748
|
|
|
|
|
|
|
# Convert CSV to spreadsheet in temp file (deleted at process exit) |
|
1749
|
|
|
|
|
|
|
$hash = convert_spreadsheet(inpath => "mycsv.csv", cvt_to => "xlsx"); |
|
1750
|
|
|
|
|
|
|
print "Output is $hash->{outpath}\n"; # e.g. "/tmp/dwYT6qf/mycsv.xlsx" |
|
1751
|
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
# Convert *all* sheets to CSV files in a temp directory |
|
1753
|
|
|
|
|
|
|
$hash = convert_spreadsheet(inpath => "mywork.xls", cvt_to => "csv", |
|
1754
|
|
|
|
|
|
|
allsheets => 1); |
|
1755
|
|
|
|
|
|
|
opendir $dh, $hash->{outpath}; |
|
1756
|
|
|
|
|
|
|
while (readrir($h)) { say "Output csv file is $_" } |
|
1757
|
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
# Transcode a CSV from windows-1252 to UTF-8 |
|
1759
|
|
|
|
|
|
|
convert_spreadsheet( |
|
1760
|
|
|
|
|
|
|
inpath => "input.csv", input_encoding => 'windows-1252', |
|
1761
|
|
|
|
|
|
|
outpath => "output.csv", output_encodoutg => 'UTF-8', |
|
1762
|
|
|
|
|
|
|
); |
|
1763
|
|
|
|
|
|
|
|
|
1764
|
|
|
|
|
|
|
# Translate between 0-based column index and letter code (A, B, etc.) |
|
1765
|
|
|
|
|
|
|
print cx2let(0); # "A" |
|
1766
|
|
|
|
|
|
|
print let2cx("A"); # 0 |
|
1767
|
|
|
|
|
|
|
print cx2let(26); # "AA" |
|
1768
|
|
|
|
|
|
|
print let2cx("ABC"); # 730 |
|
1769
|
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
# Extract components from "filepath!SHEETNAME" specifiers |
|
1771
|
|
|
|
|
|
|
my $path = filepath_from_spec("/path/to/spreasheet.xls!Sheet1") |
|
1772
|
|
|
|
|
|
|
my $sheetname = sheetname_from_spec("/path/to/spreasheet.xls!Sheet1") |
|
1773
|
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
# Parse a csv file with sane options |
|
1775
|
|
|
|
|
|
|
my $csv = Text::CSV->new({ @sane_CSV_read_options, eol => $hash->{eol} }) |
|
1776
|
|
|
|
|
|
|
or die "ERROR: ".Text::CSV->error_diag (); |
|
1777
|
|
|
|
|
|
|
my @rows |
|
1778
|
|
|
|
|
|
|
while (my $F = $csv->getline( $infh )) { |
|
1779
|
|
|
|
|
|
|
push @rows, $F; |
|
1780
|
|
|
|
|
|
|
} |
|
1781
|
|
|
|
|
|
|
close $infh or die "Error reading ", $hash->csvpath(), ": $!"; |
|
1782
|
|
|
|
|
|
|
|
|
1783
|
|
|
|
|
|
|
# Write a csv file with sane options |
|
1784
|
|
|
|
|
|
|
my $ocsv = Text::CSV->new({ @sane_CSV_write_options }) |
|
1785
|
|
|
|
|
|
|
or die "ERROR: ".Text::CSV->error_diag (); |
|
1786
|
|
|
|
|
|
|
open my $outfh, ">:encoding(utf8)", $outpath |
|
1787
|
|
|
|
|
|
|
or die "$outpath: $!"; |
|
1788
|
|
|
|
|
|
|
foreach (@rows) { $ocsv->print($outfh, $_) } |
|
1789
|
|
|
|
|
|
|
close $outfh or die "Error writing $outpath: $!"; |
|
1790
|
|
|
|
|
|
|
|
|
1791
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
1792
|
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
Convert between CSV and spreadsheet files using external tools, plus some utility functions |
|
1794
|
|
|
|
|
|
|
|
|
1795
|
|
|
|
|
|
|
Currently this uses LibreOffice or OpenOffice (whatever is installed). An external tool is not needed when only CSV files are involved. |
|
1796
|
|
|
|
|
|
|
|
|
1797
|
|
|
|
|
|
|
=head2 $hash = OpenAsCsv INPUT |
|
1798
|
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
=head2 $hash = OpenAsCsv inpath => INPUT, sheetname => SHEETNAME, ... |
|
1800
|
|
|
|
|
|
|
|
|
1801
|
|
|
|
|
|
|
This is a thin wrapper for C<convert_spreadsheet> followed by C<open> |
|
1802
|
|
|
|
|
|
|
|
|
1803
|
|
|
|
|
|
|
If a single argument is given it specifies INPUT; otherwise all arguments must |
|
1804
|
|
|
|
|
|
|
be specified as key => value pairs, and may include any options supported |
|
1805
|
|
|
|
|
|
|
by C<convert_spreadsheet>. |
|
1806
|
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
INPUT may be a csv or spreadsheet workbook path; if a spreadsheet, |
|
1808
|
|
|
|
|
|
|
then a single "sheet" is converted, specified by either a !SHEETNAME suffix |
|
1809
|
|
|
|
|
|
|
in the INPUT path, a separate C<< sheetname => SHEETNAME >> option, |
|
1810
|
|
|
|
|
|
|
or unspecified to extract the only sheet (croaks if there is more than one). |
|
1811
|
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
The resulting file handle refers to a guaranteed-seekable CSV file; |
|
1813
|
|
|
|
|
|
|
this will either be a temporary file (auto-removed at process exit), |
|
1814
|
|
|
|
|
|
|
or the original INPUT if it was already a seekable csv file. |
|
1815
|
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
RETURNS: A ref to a hash containing the following: |
|
1817
|
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
{ |
|
1819
|
|
|
|
|
|
|
fh => the resulting open file handle |
|
1820
|
|
|
|
|
|
|
csvpath => the path {fh} refers to, which might be a temporary file |
|
1821
|
|
|
|
|
|
|
sheetname => sheet name if the input was a spreadsheet |
|
1822
|
|
|
|
|
|
|
} |
|
1823
|
|
|
|
|
|
|
|
|
1824
|
|
|
|
|
|
|
=head2 convert_spreadsheet INPUT, cvt_to=>suffix, OPTIONS |
|
1825
|
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
=head2 convert_spreadsheet INPUT, cvt_to=>"csv", allsheets => 1, OPTIONS |
|
1827
|
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
Convert CSV to spreadsheet or vice-versa, or transcode CSV to CSV. |
|
1829
|
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
RETURNS: A ref to a hash containing: |
|
1831
|
|
|
|
|
|
|
|
|
1832
|
|
|
|
|
|
|
{ |
|
1833
|
|
|
|
|
|
|
outpath => path to the output file (or directory with 'allsheets') |
|
1834
|
|
|
|
|
|
|
(a temporary file/dir or as you specified in OPTIONS). |
|
1835
|
|
|
|
|
|
|
|
|
1836
|
|
|
|
|
|
|
encoding => the encoding used when writing .csv files |
|
1837
|
|
|
|
|
|
|
} |
|
1838
|
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
INPUT is the input file path; it may be a separate first argument as |
|
1840
|
|
|
|
|
|
|
shown above, or else included in OPTIONS as C<< inpath =E<gt> INPUT >>. |
|
1841
|
|
|
|
|
|
|
|
|
1842
|
|
|
|
|
|
|
If C<outpath =E<gt> OUTPATH> is specifed then results are I<always> saved |
|
1843
|
|
|
|
|
|
|
to that path. With C<allsheets> this is a directory, which will be created |
|
1844
|
|
|
|
|
|
|
if necessary. |
|
1845
|
|
|
|
|
|
|
|
|
1846
|
|
|
|
|
|
|
If C<outpath> is NOT specified in OPTIONS then, with one exception, |
|
1847
|
|
|
|
|
|
|
results are saved to a temporary file or directory and that path is returned |
|
1848
|
|
|
|
|
|
|
as C<outpath> in the result hash. |
|
1849
|
|
|
|
|
|
|
The exception is if no conversion is necessary |
|
1850
|
|
|
|
|
|
|
(i.e. C<cvt_from> is the same as C<cvt_to>), when the |
|
1851
|
|
|
|
|
|
|
input file itself is returned as C<outpath>. |
|
1852
|
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
In all cases C<outpath> in the result hash points to the results. |
|
1854
|
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
C<cvt_to> or C<cvt_from> are filename suffixes (sans dot) |
|
1856
|
|
|
|
|
|
|
e.g. "csv", "xlsx", etc., and need not be specified when indicated by |
|
1857
|
|
|
|
|
|
|
C<outpath> or INPUT parameters. |
|
1858
|
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
OPTIONS may also include: |
|
1860
|
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
=over 4 |
|
1862
|
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
=item sheetname => "sheet name" |
|
1864
|
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
The workbook 'sheet' name used when reading or writing a spreadsheet. |
|
1866
|
|
|
|
|
|
|
An input sheet name may also be specified as "!sheetname" appended to |
|
1867
|
|
|
|
|
|
|
the INPUT path. |
|
1868
|
|
|
|
|
|
|
|
|
1869
|
|
|
|
|
|
|
=item allsheets => BOOL |
|
1870
|
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
B<All> sheets in the input |
|
1872
|
|
|
|
|
|
|
are converted to separate .csv files named "SHEETNAME.csv" in |
|
1873
|
|
|
|
|
|
|
the 'outpath' directory. C<< cvt_to =E<gt> 'csv' >> is also requred. |
|
1874
|
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
=item input_encoding => ENCODING |
|
1876
|
|
|
|
|
|
|
|
|
1877
|
|
|
|
|
|
|
Specifies the encoding of INPUT if it is a csv file. |
|
1878
|
|
|
|
|
|
|
|
|
1879
|
|
|
|
|
|
|
ENCODING may be a comma-separated list of encoding |
|
1880
|
|
|
|
|
|
|
names which will be tried in the order until one seems to work. |
|
1881
|
|
|
|
|
|
|
If only one is specified it will be used without trying it first. |
|
1882
|
|
|
|
|
|
|
The default is "UTF-8,windows-1252". |
|
1883
|
|
|
|
|
|
|
|
|
1884
|
|
|
|
|
|
|
=item output_encoding => ENCODING |
|
1885
|
|
|
|
|
|
|
|
|
1886
|
|
|
|
|
|
|
Used when writing csv file(s), defaults to 'UTF-8'. |
|
1887
|
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
=item col_formats => [...] |
|
1889
|
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
This specifies how CSV data is imported into a spreadsheet. Each element |
|
1891
|
|
|
|
|
|
|
of the array may contain: |
|
1892
|
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
undef, "standard" or "" (LibreOffice will auto-detect) |
|
1894
|
|
|
|
|
|
|
"text" (imported as unmolested text) |
|
1895
|
|
|
|
|
|
|
"MM/DD/YY", |
|
1896
|
|
|
|
|
|
|
"DD/MM/YY", |
|
1897
|
|
|
|
|
|
|
"YY/MM/DD", |
|
1898
|
|
|
|
|
|
|
"ignore" (do not import this column) |
|
1899
|
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
Elements may also contain the numeric format codes defined by LibreOffice |
|
1901
|
|
|
|
|
|
|
at L<https://wiki.documentfoundation.org/Documentation/DevGuide/Spreadsheet_Documents#Filter_Options_for_the_CSV_Filter> |
|
1902
|
|
|
|
|
|
|
|
|
1903
|
|
|
|
|
|
|
B<Automatic format detection:> |
|
1904
|
|
|
|
|
|
|
Input CSV data is pre-scanned to auto-detect column formats |
|
1905
|
|
|
|
|
|
|
as much as possible. This usually works well as long as dates are |
|
1906
|
|
|
|
|
|
|
represented unambiguously, e.g. "2021-01-01" or "Jan 1, 2023". |
|
1907
|
|
|
|
|
|
|
|
|
1908
|
|
|
|
|
|
|
Specifically, this detects leading zeroes such as in U.S. Zip Codes, |
|
1909
|
|
|
|
|
|
|
and MM/DD/YY or DD/MM/YY dates when a DD happens to be more than 12. |
|
1910
|
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
=item verbose => BOOL |
|
1912
|
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
=back |
|
1914
|
|
|
|
|
|
|
|
|
1915
|
|
|
|
|
|
|
=head3 B<'binmode' Argument For Reading result CSVs> |
|
1916
|
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
It is not possible to control the line-ending style in output CSV files, |
|
1918
|
|
|
|
|
|
|
but the following incantation will correctly read either DOS/Windows (CR,LF) |
|
1919
|
|
|
|
|
|
|
or *nix (LF) line endings properly, i.e. as a single \n: |
|
1920
|
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
open my $fh, "<", $resulthash->{outpath}; |
|
1922
|
|
|
|
|
|
|
my $enc = $resulthash->{encoding}; |
|
1923
|
|
|
|
|
|
|
binmode($fh, ":raw:encoding($enc):crlf"); |
|
1924
|
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
=head2 @sane_CSV_read_options |
|
1926
|
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
=head2 @sane_CSV_write_options |
|
1928
|
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
These contain options you will always want to use with |
|
1930
|
|
|
|
|
|
|
S<<< C<< Text::CSV->new() >> >>>. |
|
1931
|
|
|
|
|
|
|
Specifically, quotes and embedded newlines are handled correctly. |
|
1932
|
|
|
|
|
|
|
|
|
1933
|
|
|
|
|
|
|
Not exported by default. |
|
1934
|
|
|
|
|
|
|
|
|
1935
|
|
|
|
|
|
|
=head2 cx2let COLUMNINDEX |
|
1936
|
|
|
|
|
|
|
|
|
1937
|
|
|
|
|
|
|
=head2 let2cx LETTERCODE |
|
1938
|
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
Functions which translate between spreadsheet-column |
|
1940
|
|
|
|
|
|
|
letter codes ("A", "B", etc.) and 0-based column indicies. |
|
1941
|
|
|
|
|
|
|
Not exported by default. |
|
1942
|
|
|
|
|
|
|
|
|
1943
|
|
|
|
|
|
|
=head2 filepath_from_spec EXPR |
|
1944
|
|
|
|
|
|
|
|
|
1945
|
|
|
|
|
|
|
=head2 sheetname_from_spec EXPR |
|
1946
|
|
|
|
|
|
|
|
|
1947
|
|
|
|
|
|
|
Functions which decompose strings containing a spreadsheet path and possibly sheetname |
|
1948
|
|
|
|
|
|
|
suffix, of the form "FILEPATH!SHEETNAME", "FILEPATH|||SHEETNAME", or "FILEPATH[SHEETNAME]". |
|
1949
|
|
|
|
|
|
|
C<sheetname_from_spec> returns C<undef> if the input does not have a |
|
1950
|
|
|
|
|
|
|
a sheetname suffix. |
|
1951
|
|
|
|
|
|
|
Not exported by default. |
|
1952
|
|
|
|
|
|
|
|
|
1953
|
|
|
|
|
|
|
=head2 form_spec_with_sheetname(PATH, SHEENAME) |
|
1954
|
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
Composes a combined string in a "preferred" format (currently "PATH!SHEETNAME"). |
|
1956
|
|
|
|
|
|
|
Not exported by default. |
|
1957
|
|
|
|
|
|
|
|
|
1958
|
|
|
|
|
|
|
=head1 Testing if LibreOffice etc. is Installed |
|
1959
|
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
=head2 $bool = can_cvt_spreadsheets(); |
|
1961
|
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
=head2 $bool = sub can_extract_allsheets(); |
|
1963
|
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
=head2 $bool = can_extract_named_sheet(); |
|
1965
|
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
These feature-test functions return false if the corresponding operations |
|
1967
|
|
|
|
|
|
|
are not possible because LibreOffice (or, someday gnumeric) is not installed |
|
1968
|
|
|
|
|
|
|
or is an older version which does not have needed capabilities. |
|
1969
|
|
|
|
|
|
|
|
|
1970
|
|
|
|
|
|
|
=head2 $path = openlibreoffice_path(); |
|
1971
|
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
Returns the detected path of I<soffice> (Apache Open Office or Libre Office) |
|
1973
|
|
|
|
|
|
|
or undef if not found. |
|
1974
|
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
These are not exported by default. |
|
1976
|
|
|
|
|
|
|
|
|
1977
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
1978
|
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
L<Spreadsheet::Edit> and L<Text::CSV> |
|
1980
|
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
=cut |
|
1982
|
|
|
|
|
|
|
|