File Coverage

blib/lib/Spreadsheet/Edit/IO.pm
Criterion Covered Total %
statement 210 534 39.3
branch 59 310 19.0
condition 41 181 22.6
subroutine 45 84 53.5
pod 10 11 90.9
total 365 1120 32.5


line stmt bran cond sub pod time code
1             # License: http://creativecommons.org/publicdomain/zero/1.0/
2             # (CC0 or Public Domain). To the extent possible under law, the author,
3             # Jim Avera (email jim.avera at gmail dot com) has waived all copyright and
4             # related or neighboring rights to this document. Attribution is requested
5             # but not required.
6 3     3   22 use strict; use warnings FATAL => 'all'; use utf8;
  3     3   8  
  3     3   152  
  3         23  
  3         17  
  3         127  
  3         17  
  3         7  
  3         35  
7 3     3   100 use feature qw(say state lexical_subs current_sub);
  3         13  
  3         260  
8 3     3   19 no warnings qw(experimental::lexical_subs);
  3         7  
  3         2303  
9              
10             package Spreadsheet::Edit::IO;
11              
12             # Allow "use <thismodule. VERSION ..." in development sandbox to not bomb
13 3     3   20 { no strict 'refs'; ${__PACKAGE__."::VER"."SION"} = 1999.999; }
  3         5  
  3         267  
14             our $VERSION = '1000.008'; # VERSION from Dist::Zilla::Plugin::OurPkgVersion
15             our $DATE = '2023-09-19'; # DATE from Dist::Zilla::Plugin::OurDate
16              
17             # This module is derived from the old never-released Text:CSV::Spreadsheet
18              
19 3     3   25 use Exporter 'import';
  3         5  
  3         213  
20              
21             our @EXPORT = qw/convert_spreadsheet OpenAsCsv cx2let let2cx cxrx2sheetaddr
22             sheetname_from_spec filepath_from_spec
23             form_spec_with_sheetname/;
24              
25             our @EXPORT_OK = qw/can_cvt_spreadsheets can_extract_allsheets can_extract_named_sheet
26             openlibreoffice_path
27             @sane_CSV_read_options @sane_CSV_write_options/;
28              
29             # TODO: Provide "known_attributes" function ala Text::CSV::known_attributes()
30              
31 3     3   1612 use version ();
  3         6575  
  3         98  
32 3     3   19 use Carp;
  3         7  
  3         182  
33              
34 3     3   19 use File::Find ();
  3         6  
  3         67  
35 3     3   1707 use File::Copy ();
  3         8606  
  3         91  
36 3     3   1780 use File::Copy::Recursive ();
  3         14724  
  3         106  
37 3     3   28 use File::Glob qw/bsd_glob/;
  3         7  
  3         367  
38              
39 3     3   920 use Path::Tiny qw/path/;
  3         14629  
  3         168  
40              
41             # Path::Tiny OBVIATES NEED for many but we still need this
42 3     3   62 use File::Spec ();
  3         6  
  3         81  
43 3     3   619 use File::Spec::Functions qw/devnull tmpdir rootdir catdir catfile/;
  3         1021  
  3         249  
44              
45             # Still sometimes convenient...
46 3     3   25 use File::Basename qw(basename);
  3         7  
  3         172  
47              
48 3     3   1540 use File::Which qw/which/;
  3         3606  
  3         176  
49 3     3   1565 use URI::file ();
  3         35299  
  3         98  
50 3     3   24 use Guard qw(guard scope_guard);
  3         9  
  3         216  
51 3     3   24 use Fcntl qw(:flock :seek);
  3         5  
  3         431  
52 3     3   21 use Scalar::Util qw/blessed/;
  3         9  
  3         167  
53 3     3   22 use List::Util qw/none all notall first min max/;
  3         6  
  3         261  
54 3     3   35 use Encode qw(encode decode);
  3         6  
  3         166  
55 3     3   17 use File::Glob qw/bsd_glob GLOB_NOCASE/;
  3         22  
  3         215  
56 3     3   22 use Digest::MD5 qw/md5_base64/;
  3         7  
  3         189  
57 3     3   18 use Text::CSV ();
  3         16  
  3         170  
58             # DDI 5.025 is needed for Windows-aware qsh()
59 3     3   25 use Data::Dumper::Interp 5.025 qw/vis visq dvis dvisq ivis ivisq avis qsh qshlist u/;
  3         75  
  3         35  
60              
61 3         51 use Spreadsheet::Edit::Log qw/log_call fmt_call log_methcall fmt_methcall oops/,
62 3     3   1056 ':btw=IO${lno}:';
  3         9  
63             our %SpreadsheetEdit_Log_Options = (
64             is_public_api => sub{
65             $_[1][3] =~ /(?: ::|^ )(?: [a-z][^:]* | OpenAsCsv | ConvertSpreadsheet )$/x
66             },
67             );
68              
69             my $progname = path($0)->basename;
70              
71             sub _get_username(;$) {
72 4     4   9 my ($uid) = @_;
73 4   50     31 $uid //= eval{ $> } // -1; # default to EUID
  4   33     62  
74 4         10 state $answer = {};
75 4   66     28 return $answer->{$uid} //= do {
76             # https://stackoverflow.com/questions/12081246/how-to-get-system-user-full-name-on-windows-in-perl
77 3   33     2830 eval { getpwuid($uid) // $uid }
78             ||
79 3 0 0     7 ($^O =~ /MSWin/ && $uid == (eval{$>}//-1) && eval{ # untested...
  0   0     0  
      0        
      33        
80 0         0 require Win32API::Net;
81 0   0     0 Win32API::Net::UserGetInfo($ENV{LOGONSERVER}||'',Win32::LoginName(),10,my $info={});
82             $info->{fullName}
83 0         0 })
84             ||
85             "UID$uid"
86             };
87             }
88              
89              
90             # A private Libre/Open Office profile dir is needed to avoid conflicts
91             # with interactive sessions, see
92             # https://ask.libreoffice.org/en/question/290306/how-to-start-independent-lo-instance-process
93             #
94             # We use a persistent profile dir shared among processes for a given user
95             # (actually one for each unique external tool which needs one).
96             # Sharing is okay because we get an exclusive lock before actually using it.
97             state $profile_parent_dir = do{ # also used for lockfile
98             my $user = _get_username();
99             my $dname = __PACKAGE__."_${user}_profileparent";
100             $dname =~ s/::/-/g;
101             $dname =~ s/[^-\w.:]/_/g;
102             (my $path = path(File::Spec->tmpdir)->child($dname))->mkpath;
103             $path # Path::Tiny
104             };
105             sub _get_tool_profdir($) {
106 0     0   0 my ($tool_path) = @_;
107 0         0 my $fingerprint = _file_fingerprint($tool_path);
108 0         0 (my $toolname = path($tool_path)->basename(qw/\.\w+$/)) =~ s/[^-\w.:]/_/g;
109 0         0 my $path = $profile_parent_dir->child("${toolname}_$fingerprint");
110 0         0 $path->mkpath;
111 0         0 $path
112             }
113              
114             # Prevent concurrent document conversions.
115             # LO & OO can't handle concurrent access to the same profile.
116             my $locked_fh;
117             sub _get_exclusive_lock($) { # returns lock object
118 0     0   0 my $opts = shift;
119 0         0 my $lockfile_path = $profile_parent_dir->child("LOCKFILE")->canonpath;
120 0         0 my $sleeptime = 1;
121 0         0 my $lock_fh;
122 0         0 while (! defined $lock_fh) {
123             #warn "$$ : ### AAA open $lockfile_path ...\n";
124 0 0       0 open $lock_fh, "+>>", $lockfile_path or die $!;
125             #warn "$$ : ### AA2 open succeeded.\n";
126 0         0 eval { chmod 0666, $lock_fh; }; # sometimes not implemented
  0         0  
127             #warn "$$ : ### AA3 flock ...\n";
128 0 0       0 if (! flock($lock_fh, LOCK_EX|LOCK_NB)) {
129             #warn "$$ : ### AA4 flock FAILED\n";
130 0 0       0 seek($lock_fh, 0, SEEK_SET) or die;
131 0         0 my @lines = <$lock_fh>;
132 0 0       0 close($lock_fh) or die "close:$!"; $lock_fh = undef;
  0         0  
133             #warn "$$ : ### AA6 fh closed...\n";
134 0   0     0 my $owner = $lines[-1] // ""; # pid NNN (progname)
135 0 0       0 { my ($pid) = ($owner =~ /pid (\d+)/) or last;
  0         0  
136 0 0       0 my @s = stat("/proc/$pid") or last;
137 0         0 $owner = _get_username($s[4])." ".$owner;
138             }
139 0 0       0 my $ownermsg = $owner ? " held by $owner" : "";
140             # Carp::longmess ...
141             warn ">> ($$) Waiting for exclusive lock${ownermsg}...\n",
142             " $lockfile_path\n"
143 0 0       0 unless $opts->{silent};
144 0         0 sleep $sleeptime;
145             } else {
146 0         0 $locked_fh = $lock_fh;
147 0 0       0 seek($lock_fh, 0, SEEK_END) or die;
148 0         0 print $lock_fh "pid $$ ($progname)\n"; # always appends anyway on *nix
149             }
150             }
151 0         0 $opts->{lockfile_fh} = $lock_fh;
152             #warn "$$ : ### GOT LOCK\n";
153             }
154             END{
155 3 50   3   883 if (defined $locked_fh) {
156 0         0 flock($locked_fh, LOCK_UN);
157 0         0 close($locked_fh);
158 0         0 $locked_fh = undef;
159 0         0 warn "Did emergency unlock!\n";
160             }
161             #else { warn "(emergency unlock not needed)\n"; }
162             }
163             sub _release_lock($) {
164 0     0   0 my $opts = shift;
165 0   0     0 my $fh = delete($opts->{lockfile_fh}) // oops;
166 0 0       0 oops unless $fh == $locked_fh;
167             #seek($fh, 0, SEEK_SET) or die;
168             #my @x = (<$fh>);
169             #seek($fh, 0, SEEK_SET) or die;
170             #warn dvis "$$ : Lockfile contains: @x\n";
171             ##warn "$$ : ###BBB stalling before unlock...\n"; sleep 3;
172 0         0 truncate($fh,0);
173 0 0       0 flock($fh, LOCK_UN) or die "flock UN: $!";
174 0         0 close $fh;
175 0         0 $locked_fh = undef;
176             #warn "$$ : ###BB0 unlocked and closed.\n";
177             }
178              
179             # Libre Office text converter "charset" numbers
180             my %LO_charsets = (
181             'WINDOWS1252' => 1, 'WINLATIN1' => 1,
182             'APPLEWESTERN' => 2,
183             'DOS/OS2437' => 3,
184             'DOS/OS2850' => 4,
185             'DOS/OS2860' => 5,
186             'DOS/OS2861' => 6,
187             'DOS/OS2863' => 7,
188             'DOS/OS2865' => 8,
189             'SYSTEM' => 9, 'SYSTEMDDEFAULT' => 9,
190             'SYMBOL' => 10,
191             'ASCII' => 11,
192             'ISO88591' => 12,
193             'ISO88592' => 13,
194             'ISO88593' => 14,
195             'ISO88594' => 15,
196             'ISO88595' => 16,
197             'ISO88596' => 17,
198             'ISO88597' => 18,
199             'ISO88598' => 19,
200             'ISO88599' => 20,
201             'ISO885914' => 21,
202             'ISO885915' => 22,
203             'OS2737' => 23,
204             'OS2775' => 24,
205             'OS2852' => 25,
206             'OS2855' => 26,
207             'OS2857' => 27,
208             'OS2862' => 28,
209             'OS2864' => 29,
210             'OS2866' => 30,
211             'OS2869' => 31,
212             'WINDOWS874' => 32,
213             'WINDOWS1250' => 33, 'WINLATIN2' => 33,
214             'WINDOWS1251' => 34,
215             'WINDOWS1253' => 35,
216             'WINDOWS1254' => 36,
217             'WINDOWS1255' => 37,
218             'WINDOWS1256' => 38,
219             'WINDOWS1257' => 39,
220             'WINDOWS1258' => 40,
221             'APPLEARABIC' => 41,
222             'APPLECENTRALEUROPEAN' => 42,
223             'APPLECROATIAN' => 43,
224             'APPLECYRILLIC' => 44,
225             'APPLEDEVANAGARI' => 45,
226             'APPLEFARSI' => 46,
227             'APPLEGREEK' => 47,
228             'APPLEGUJARATI' => 48,
229             'APPLEGURMUKHI' => 49,
230             'APPLEHEBREW' => 50,
231             'APPLEICELANDIC' => 51,
232             'APPLEROMANIAN' => 52,
233             'APPLETHAI' => 53,
234             'APPLETURKISH' => 54,
235             'APPLEUKRAINIAN' => 55,
236             'APPLECHINESESIMPLIFIED' => 56,
237             'APPLECHINESETRADITIONAL' => 57,
238             'APPLEJAPANESE' => 58,
239             'APPLEKOREAN' => 59,
240             'WINDOWS932' => 60,
241             'WINDOWS936' => 61,
242             'WINDOWSWANSUNG949' => 62,
243             'WINDOWS950' => 63,
244             'SHIFTJIS' => 64,
245             'GB2312' => 65,
246             'GBT12345' => 66,
247             'GBK' => 67, 'GB231280' => 67,
248             'BIG5' => 68,
249             'EUCJP' => 69,
250             'EUCCN' => 70,
251             'EUCTW' => 71,
252             'ISO2022JP' => 72,
253             'ISO2022CN' => 73,
254             'KOI8R' => 74,
255             'UTF7' => 75,
256             'UTF8' => 76,
257             'ISO885910' => 77,
258             'ISO885913' => 78,
259             'EUCKR' => 79,
260             'ISO2022KR' => 80,
261             'JIS0201' => 81,
262             'JIS0208' => 82,
263             'JIS0212' => 83,
264             'WINDOWSJOHAB1361' => 84,
265             'GB18030' => 85,
266             'BIG5HKSCS' => 86,
267             'TIS620' => 87,
268             'KOI8U' => 88,
269             'ISCIIDEVANAGARI' => 89,
270             'JAVAUTF8' => 90,
271             'ADOBESTANDARD' => 91,
272             'ADOBESYMBOL' => 92,
273             'PT154' => 93,
274             'UCS4' => 65534,
275             'UCS2' => 65535,
276             );
277              
278             =for Pod::Coverage _name2LOcharsetnum
279             =cut
280              
281             sub _name2LOcharsetnum($) {
282 0     0   0 my ($enc) = @_;
283 0         0 local $_ = uc $enc;
284 0         0 while (! $LO_charsets{$_}) {
285             # successively remove - and other special characters
286 0 0       0 s/\W//a or confess "LO charset: Unknown encoding name '$enc'";
287             }
288 0         0 $LO_charsets{$_}
289             }
290              
291             # convert between 0-based index and spreadsheet column letter code.
292             # Default argument is $_
293             sub cx2let(_) {
294 0     0 1 0 my $cx = shift;
295 0         0 my $ABC="A"; ++$ABC for (1..$cx);
  0         0  
296 0         0 return $ABC
297             }
298             sub let2cx(_) {
299 0     0 1 0 my $ABC = shift;
300 0         0 my $n = ord(substr($ABC,0,1,"")) - ord('A');
301 0         0 while (length $ABC) {
302 0         0 my $letter = substr($ABC,0,1,"");
303 0         0 $n = (($n+1) * 26) + (ord($letter) - ord('A'));
304             }
305 0         0 return $n;
306             }
307             sub cxrx2sheetaddr($$) { # (1,99) -> "B100"
308 0     0 0 0 my ($cx, $rx) = @_;
309 0         0 return cx2let($cx) . ($rx + 1);
310             }
311              
312             =for Pod::Coverage cxrx2sheetaddr oops btw
313             =cut
314              
315             our @sane_CSV_read_options = (
316             # Text::CSV pod says to not specify 'eol' to allow embedded newlines,
317             # and to automatically handle "\n", "\r", or "\r\n".
318             #eol => $/,
319             binary => 1, # Allow reading embedded newlines & unicode etc.
320             sep_char => ",",
321             quote_char => '"',
322             escape_char => '"', # Embedded "s appear as ""
323             allow_whitespace => 0, # Preserve leading & trailing white space
324             auto_diag => 2, # die on errors
325             );
326             our @sane_CSV_write_options = (
327             eol => $/, # Necessary when WRITING csv files
328             binary => 1,
329             sep_char => ",",
330             quote_char => '"',
331             escape_char => '"', # Embedded "s appear as ""
332             allow_whitespace => 0, # Preserve leading & trailing white space
333             auto_diag => 2, # die on errors
334             );
335              
336             my %Saved_Sigs;
337             sub _sighandler {
338 0 0 0 0   0 if (! $Saved_Sigs{$_[0]} or $Saved_Sigs{$_[0]} eq 'DEFAULT') {
339             # The user isn't catching this, so the process will abort without
340             # running destructors: Call exit instead
341 0         0 warn "($$)".__PACKAGE__." caught signal $_[0], exiting\n";
342 0         0 Carp::cluck "($$)".__PACKAGE__." caught signal $_[0], exiting\n";
343 0         0 exit 1;
344             }
345 0         0 $SIG{$_[0]} = $Saved_Sigs{$_[0]};
346 0         0 kill $_[0], $$;
347 0         0 oops "Default (or user-defined) sig $_[0] action was to ignore!";
348             }
349             sub _signals_guard() {
350 0   0 0   0 %Saved_Sigs = ( map{ ($_ => ($SIG{$_} // undef)) } qw/HUP INT QUIT TERM/ );
  0         0  
351 0         0 $SIG{HUP} = $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = \&_sighandler;
352 0     0   0 return guard { @SIG{keys %Saved_Sigs} = (values %Saved_Sigs) }
353 0         0 }
354              
355             # Create a probably-unique fingerprint for a particular file
356             sub _file_fingerprint($) {
357 0     0   0 my $path = shift;
358 0         0 my $ctx = Digest::MD5->new;
359 0         0 $ctx->add($_) for((stat($path))[0,1,9]); # dev,ino,mtime
360 0         0 substr($ctx->b64digest,0,6)
361             }
362              
363             # Find LibreOffice, or failing that OpenOffice
364             our $OLpath_answer = $ENV{SPREADSHEET_EDIT_LOPATH};
365             sub openlibreoffice_path() {
366             return $OLpath_answer if $OLpath_answer;
367             unless ($ENV{SPREADSHEET_EDIT_IGNPATH}) {
368             foreach my $short_name (qw(libreoffice loffice localc)) {
369             if ($OLpath_answer = which($short_name)) {
370             return( ($OLpath_answer=path($OLpath_answer)->canonpath) );
371             }
372             }
373             }
374             # Search for an installation. On Windows it will usually be
375             # C:\Program Files\LibreOffice\...
376             # On *nix, a local/isolated install (i.e. the result of extracting files
377             # from a .deb or other archive) will be
378             # somwehere/opt/libreofficeA.B/...
379             # and "..." is the same standard hierarchy on all platforms.
380             #
381             # If multiple are found try to pick the "latest".
382             my sub _cmp_subpaths($$) {
383             my ($sp1, $sp2) = @_;
384             oops if !defined($sp1);
385             return 1 if !defined($sp2);
386             # Use longest version in the (sub-)path, e.g. "4.4.1/opt/openoffice4/..."
387             my (@v1) = sort { length($a) <=> length($b) } ($sp1 =~ /(\d[.\da-z]*)/ag);
388             my (@v2) = sort { length($a) <=> length($b) } ($sp2 =~ /(\d[.\da-z]*)/ag);
389             my $v1 = $v1[-1]//0;
390             my $v2 = $v2[-1]//0;
391             if ($v1 =~ s/alpha/0/) {
392             return -1 unless $v2 =~ s/alpha/0/;
393             }
394             if ($v2 =~ /alpha/) {
395             return +1
396             }
397             version->parse($v1) <=> version->parse($v2)
398             }
399              
400             # I tried just doing File::Glob::bsd_glob('/*/*/*/opt/libre*/program') but
401             # it silently failed even though the same glob works from the shell. Mmff...
402 3     3   28 no warnings FATAL => 'all';
  3         8  
  3         38922  
403             state $is_MSWin = ($^O eq "MSWin32");
404             my (@search_dirs, $searchfor_re, $maxdepth);
405             if ($is_MSWin) {
406             @search_dirs = ("C:\\Program Files","C:\\Program Files (x86)");
407             $searchfor_re = qr/^Program Files/;
408             $maxdepth = 1;
409             # depth: C:\Program Files\libreofficeXXX/program/
410             # 1
411             } else {
412             @search_dirs = (File::Spec->rootdir());
413             push @search_dirs, $ENV{HOME} if $ENV{HOME};
414             $maxdepth = 4;
415             $searchfor_re = qr/^opt$/;
416             # depth: /*/*/<unpackparent>/opt/libreofficeXXX/program/
417             # 1 2 3 4
418             }
419              
420             my $debug = $ENV{SPREADSHEET_EDIT_FINDDEBUG};
421             my sub _Findvarsmsg() {
422             if (u($_) eq u($File::Find::name) && u($_) eq u($File::Find::fullname)) {
423             return qsh($_)."\n"
424             }
425             if (u($_) eq u($File::Find::name)) {
426             return "\$_=name=".qsh($_)." -> ".qsh($File::Find::fullname)."\n";
427             }
428             "\$_=".qsh($_)." name=".qsh($File::Find::name)." fullname=".qsh($File::Find::fullname)."\n";
429             }
430              
431             my %results;
432             $ENV{SPREADSHEET_EDIT_NOLOSEARCH} or
433             File::Find::find(
434             { wanted => sub{
435             # Undef fullname OR invalid "_" filehandle implies a broken symlink,
436             # see https://github.com/Perl/perl5/issues/21122
437             # Zero size on *nix implies /proc or something similar; do not enter.
438             # File::Find::fullname unreadable implies followed link to inaccessable
439             # (The initial "_" stat may be invalid, so "-l _" is useless)
440 4865766     4865766   16926603 $! = 0;
441             # https://github.com/Perl/perl5/issues/21143
442 4865766         7671804 my $fullname = $File::Find::fullname;
443 4865766 50 33     13641099 if (!defined($fullname) && $is_MSWin) {
444 0 0       0 warn "# _ MSWin undef fullname! ",_Findvarsmsg() if $debug;
445 0         0 stat($_); # lstat was not done. Grr...
446 0         0 $fullname = $File::Find::name;
447 0 0       0 unless (-d _) {
448 0         0 $File::Find::prune = 1; # in case it really is a dir
449 0         0 return;
450             }
451             } else {
452 4865766 100 100     16813158 unless (-d _ or -l _) {
453 4487594 100       10388015 warn "# _ notdir/symlink: ",_Findvarsmsg() if $debug;
454 4487594         54486706 $File::Find::prune = 1; # in case it really is
455 4487594         116840567 return;
456             }
457             }
458 378172 100 33     14702367 if (
      66        
      66        
      66        
      66        
      66        
      66        
      100        
459             !defined($fullname) # broken link, per docs
460             || (! -r _) || (! -x _) # unreadable item or invalid "_" handle
461             # https://github.com/Perl/perl5/issues/21122
462             || (!$is_MSWin && (stat(_))[7] == 0) # zero size ==> /proc etc.
463             || /\$/ # $some_windows_special_thing$
464             || ! -r $fullname # presumably a symlink to unreadable
465             || ! -x _ # or unsearchable dir
466             || m#^/snap/(?!.*ffice)# # snap other than e.g. /snap/libreoffice
467             || m#^/(proc|dev|sys|tmp|boot|run|lost+found|usr/(include|src))$#
468             ) {
469 646 100       2310 warn "# PRUNING ",_Findvarsmsg() if $debug;
470 646         8617 $File::Find::prune = 1;
471             return
472 646         35719 }
473 377526 100       1707461 warn "# DIR: ",_Findvarsmsg() if $debug;
474             # Maximum depth: /*/*/<unpackparent>/opt/libreofficeXXX/program/
475 377526         5659849 my $path = path($_);
476 377526         14331437 my $depth = scalar(() = $path->stringify =~ m#(/)#g);
477 377526 100       14159501 if (basename($_) =~ $searchfor_re) { # ^opt$ or ^Program Files
478 6         24 my $prefix = path($_)->parent->parent;
479 6         1095 for my $o_l (qw/libre open/) {
480 12         73 my $pattern
481             = path($_)->child("${o_l}*/program/soffice*")->canonpath;
482             # eval because I'm suspicious of the glob on Windows
483 12         914 my @hits; eval{ @hits = sort +bsd_glob($pattern, GLOB_NOCASE) };
  12         27  
  12         573  
484 12 50       61 if (@hits) {
485             # On windows, use soffice.com not .exe because it writes messages
486             # to stdout not a window. See https://help.libreoffice.org/7.5/en-GB/text/shared/guide/start_parameters.html?&DbPAR=SHARED&System=WIN
487             my $path = (first{ /soffice\.com$/ } @hits) ||
488 0   0     0 (first{ /soffice$/ } @hits);
489 0 0       0 if ($path) {
490 0 0       0 $prefix->subsumes($path) or oops dvis '$prefix $path';
491 0         0 my $subpath = path($path)->relative($prefix);
492 0 0       0 if (_cmp_subpaths($subpath, $results{$o_l}{subpath}) >= 0) {
493 0         0 @{$results{$o_l}}{qw/path subpath/} = ($path, $subpath);
  0         0  
494             # We found where installations are, don't look deeper
495 0         0 $maxdepth = $depth;
496             }
497             }
498             }
499 12 50       52 else { btw dvis '##glob failed: $pattern\n$@' if $@; }
500             }
501             }
502 377526 100       36142670 if ($depth == $maxdepth) {
    50          
503 2086 100       6066 warn "# pruning at maxdepth $depth ",qsh($_),"\n" if $debug;
504 2086         20284 $File::Find::prune = 1;
505 2086         46991 return;
506             }
507 0         0 elsif ($depth > $maxdepth) { oops dvis '$depth $maxdepth $_' }
508             },
509             follow_fast => 1,
510             follow_skip => 2,
511             dangling_symlinks => 0,
512             no_chdir => 1
513             },
514             @search_dirs
515             );
516             $OLpath_answer = path(
517             $results{libre}{path} // $results{open}{path}
518             || (!$ENV{SPREADSHEET_EDIT_IGNPATH} && which("soffice")) # installed OO?
519             || return(undef)
520             )->realpath->canonpath
521             }#openlibreoffice_path
522              
523             sub _openlibre_features() {
524 1     1   4 state $hash;
525 1 50       8 return $hash if defined $hash;
526 1   50     19 my $prog = openlibreoffice_path() // return(($hash={ available => 0 }));
527 0         0 my $raw_version;
528             # This is gross but fast and works in recent versions of LO
529 0 0       0 if (my $fh = eval{ path($prog)->realpath->parent->child("types/offapi.rdb")
  0         0  
530             ->filehandle("<",":raw")} ) {
531 0         0 my $octets; sysread $fh, $octets, 100;
  0         0  
532 0 0       0 if ($octets =~ /Created by LibreOffice (\d+\.\d+\.\w+)/) {
533 0         0 $raw_version = $1;
534             }
535             }
536 0 0       0 unless ($raw_version) {
537 0 0       0 if (qx/$prog --version 2>&1/ =~ /Libre.*? (\d+\.\d+\.\w+)/) {
538 0         0 $raw_version = $1;
539             } else {
540 0         0 warn "$prog --version DID NOT WORK\n";
541             }
542             }
543 0 0       0 unless ($raw_version) {
544 0         0 warn "WARNING: Could not determine version of $prog\n";
545 0         0 $raw_version = "999.01";
546             }
547 0         0 my $version = version->parse("v$raw_version");
548 0         0 $hash = {
549             available => 1,
550             # LibreOffice 7.2 allows extracting all sheets at once
551             allsheets => ($version >= version->parse("v7.2")),
552             # ...but not yet extracting a single sheet by name.
553             # https://bugs.documentfoundation.org/show_bug.cgi?id=135762#c24
554             named_sheet => 0,
555             # Supported output formats are too many to list
556             ousuf_any => 1,
557             raw_version => $raw_version, version => "$version",
558             }
559             }
560              
561 0     0   0 sub _openlibre_supports_allsheets() { _openlibre_features()->{allsheets} }
562 0     0   0 sub _openlibre_supports_named_sheet() { _openlibre_features()->{named_sheet} }
563 0     0   0 sub _openlibre_supports_writing($) { _openlibre_features()->{available} }
564              
565 1     1   279 sub _ssconvert_features() { return { availble => 0 } } # TODO add back?
566 0     0   0 sub _ssconvert_supports_allsheets() { _ssconvert_features()->{allsheets} }
567 0     0   0 sub _ssconvert_supports_named_sheet() { _ssconvert_features()->{named_sheet} }
568 0     0   0 sub _ssconvert_supports_writing($) { _ssconvert_features()->{available} }
569              
570             # These allow users (e.g. App-diff_spreadsheets tests) to determine
571             # if external tool(s) are available to convert between spreadsheet formats
572             # or to/from csv (CSVs are supported directly so can always be used)
573             # Currently used by t/io.pl to skip tests
574             sub can_cvt_spreadsheets() {
575             _openlibre_features()->{available} || _ssconvert_features()->{availble}
576 1 50   1 1 2523806 }
577             sub can_extract_allsheets() {
578 0 0   0 1 0 _openlibre_supports_allsheets() || _ssconvert_supports_allsheets()
579             }
580             sub can_extract_named_sheet() {
581 0 0 0 0 1 0 can_extract_allsheets() # used to emulate
582             || _openlibre_supports_named_sheet() || _ssconvert_supports_named_sheet()
583             }
584              
585             =for Pod::Coverage can_cvt_spreadsheets can_extract_allsheets
586             -for Pod::Coverage can_extract_named_sheet
587             =cut
588              
589             sub _runcmd($@) {
590 0     0   0 my ($opts, @cmd) = @_;
591 0         0 my $guard = _signals_guard;
592             # This used to fork & exec but that blows up on MSWin32 because the child
593             # pseudo-process executes all END{} blocks everywhere after "exec"
594 0         0 my $redirs = "";
595 0 0       0 if ($opts->{suppress_stderr}) {
596 0         0 $redirs .= " 2>".devnull();
597             }
598 0 0       0 if ($opts->{stdout_to_stderr}) {
599 0         0 confess "Not portable";
600 0         0 $redirs .= " 1>&2";
601             }
602 0 0       0 if ($opts->{stderr_to_stdout}) {
603 0         0 confess "Not portable";
604 0         0 $redirs .= " 2>&1";
605             }
606 0 0       0 if ($opts->{suppress_stdout}) {
607 0         0 $redirs .= " >".devnull();
608             }
609 0         0 my $cmdstr = join(" ", map{qsh} @cmd) . $redirs;
  0         0  
610 0 0       0 if ($redirs) {
611 0         0 foreach (@cmd) {
612 0 0       0 confess "Can not portably pass argument '$_'" if /["']/;
613             }
614 0 0       0 warn "> $cmdstr\n" if $opts->{verbose};
615 0         0 system $cmdstr;
616             } else {
617 0 0       0 warn "> $cmdstr\n" if $opts->{verbose};
618 0         0 system @cmd;
619             }
620 0         0 return $?
621             }
622              
623             sub _fmt_outpath_contents($) {
624 0   0 0   0 my $outpath = $_[0]->{outpath} // oops;
625 0 0       0 return "(outpath does not exist)" unless -e $outpath;
626 0 0       0 return "(outpath is a file)" if -f $outpath;
627 0 0       0 return "(outpath is a STRANGE OBJECT)" unless -d $outpath;
628             "\n outpath contains: "
629 0         0 .join(", ",map{qsh basename $_} path($outpath)->children);
  0         0  
630             }
631              
632             my $tempdir;
633             sub _create_tempdir_if_needed($) {
634 2     2   3 my $opts = shift;
635             # Keep a per-process persistent temp directory, deleted at process exit.
636             # It contains result files when the user did not specify {outpath},
637             # plus a cache of as-yet unrequested sheet .csv files, used when the
638             # external tool can only extract all sheets, not a single sheet by name:
639             #
640             # tempdir/
641             # <ifbase>_<sig>.xlsx etc. # single file returned to user
642             # <ifbase>_<sig>/*.csv # directory returned to user
643             # <ifbase>_<sig>_csvcache/*.csv
644             #
645             # <ifbase> is derived from the intput file name, and <sig> is a fingerprint
646             # based on input file's dev, inode, and modification timestamp.
647             #
648 2   66     8 $tempdir //= do{
649             #(my $template = __PACKAGE__."_XXXXX") =~ s/::/-/g;
650             #Path::Tiny->tempdir($template)
651 1         3 my $pid = $$;
652 1         4 my $user = _get_username();
653 1         10 (my $dname = __PACKAGE__."_${user}_${pid}_tempdir") =~ s/::/-/g;
654 1         24 (my $path = path(File::Spec->tmpdir)->child($dname))->mkpath;
655 1         380 $path
656             };
657             }
658 3 100   3   5743 END{ $tempdir->remove_tree if $tempdir; }
659              
660             # Compose a unique path under $tempdir.
661             # This is *not* a "tempfile" or "tempdir" object which auto-destructs,
662             # in fact it does not even exist yet and we don't know here which it will be.
663             # Either the user must remove it when they are done with it, or it will
664             # be removed when $tempdir is removed at process exit.
665             #
666             sub _path_under_tempdir($@) {
667 0     0   0 my $opts = shift;
668             my %args = (
669 0         0 words => [$opts->{ifbase}, $opts->{sheetname}],
670             @_
671             );
672 0         0 my $bname = join "_", grep{defined} @{$args{words}};
  0         0  
  0         0  
673             # Collisions occur when recursing to emulate Extract-by-name,
674             # or if the user repeatedly reads the same thing, etc.
675 0         0 state $seqnums = {};
676 0 0       0 if ($seqnums->{$bname}++) {
677 0         0 $bname .= "_".$seqnums->{$bname}; # append unique sequence number
678             }
679 0 0       0 $bname .= ".$args{suf}" if $args{suf};
680 0         0 return $tempdir->child($bname);
681             }
682              
683             # Compose csv cache subdir path
684             sub _cachedir($) {
685 0     0   0 my $opts = shift;
686 0         0 _path_under_tempdir($opts,words => [$opts->{ifbase}, "csvcache"]);
687             }
688              
689             ## Copy an ephemeral temp file to a path under tempdir if needed
690             #sub _make_file_permanent($$) {
691             # my ($opts, $path) = @_;
692             # if (eval{ $path->cached_temp }) { # didn't throw
693             # my $suf = $path->basename =~ /\.(\w+)$/a ? $1 : undef;
694             # my $newpath = _path_under_tempdir($opts, suf => $suf);
695             # $path->move($newpath);
696             # return $newpath
697             # } else {
698             # return $path
699             # }
700             #}
701              
702             sub _convert_using_openlibre($$$) {
703 0     0   0 my ($opts, $src, $dst) = @_;
704 0 0   0   0 oops unless all{ $opts->{$_} } qw/cvt_from cvt_to/;
  0         0  
705 0 0 0     0 oops if $opts->{allsheets} && ! _openlibre_supports_allsheets();
706 0 0 0     0 oops if $opts->{sheetname} && ! _openlibre_supports_named_sheet();
707 0         0 my $debug = $opts->{debug};
708              
709 0   0     0 my $prog = openlibreoffice_path() // oops;
710              
711 0         0 my $saved_UserInstallation = $ENV{UserInstallation};
712             # URI format is file://server/path where 'server' is empty. "file://path" is
713             # "never correct, but is often used" en.wikipedia.org/wiki/File_URI_scheme
714             # Correct examples: file::///tmp/something file:///C:/somewhere
715 0         0 $ENV{UserInstallation} = URI::file->new(_get_tool_profdir($prog)->canonpath);
716 0 0       0 warn "Temporarily set UserInstallation=$ENV{UserInstallation}\n" if $debug;
717             scope_guard {
718 0 0   0   0 if (defined $saved_UserInstallation) {
719 0         0 $ENV{UserInstallation} = $saved_UserInstallation;
720             } else {
721             delete $ENV{UserInstallation}
722 0         0 }
723 0         0 };
724              
725             # The --convert-to argument is "suffix:filtername:filteropts"
726              
727             # I think (not certain) that we can only specify the encoding of CSV files,
728             # either as input or output; .xlsx and .ods spreadsheets (which are based
729             # on XML) could in principle use any encoding internally, but I'm not sure
730             # we can control that, nor should anyone ever need to.
731              
732             # REFERENCES:
733             # https://help.libreoffice.org/7.5/en-US/text/shared/guide/start_parameters.html?&DbPAR=SHARED&System=UNIX
734             # http://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
735             # https://wiki.documentfoundation.org/Documentation/DevGuide/Spreadsheet_Documents#Filter_Options_for_the_CSV_Filter
736              
737             # I think we never want to specify the filter unless we have parameters
738             # for it. Currently that is only for csv.
739             # If no filter is specified, the suffix (e.g. 'ods') should be enough
740 0         0 state $suf2ofilter = {
741             csv => "Text - txt - csv (StarCalc)",
742             txt => "Text - txt - csv (StarCalc)",
743             #xls => "MS Excel 97",
744             #xlsx => "Calc MS Excel 2007 XML",
745             #ods => "calc8",
746             };
747              
748 0   0     0 my $ifilter = $opts->{soffice_infilter} //= do{
749 0 0       0 if ($opts->{cvt_from} eq "csv") {
750 0 0       0 my $filter_name = $suf2ofilter->{$opts->{cvt_from}} or oops;
751 0         0 my $enc = $opts->{input_encoding};
752 0         0 my $charset = _name2LOcharsetnum($enc); # dies if unknown enc
753 0         0 my $colformats = "";
754 0 0       0 if (my $cf = $opts->{col_formats}) {
755 0 0       0 $cf = [split /[\/,]/, $cf] unless ref($cf); # fmtA/fmtB/...
756 0         0 for (my $ix=0; $ix <= $#$cf; $ix++) {
757 0   0     0 local $_ = $cf->[$ix] // 1;
758 0 0 0     0 m#^([123459]|10)$#
      0        
      0        
      0        
      0        
      0        
      0        
759             || s#^standard$#1#i
760             || s#^text$#2#i
761             || s#^M+/D+/Y+$#3#i
762             || s#^D+/M+/Y+$#4#i
763             || s#^Y+/M+/D+$#5#i
764             || s#^ignore$#9#i
765             || s#^US.*English$#10#i
766             || croak "Unknown format code '$_' in {col_formats}";
767 0 0       0 $colformats .= "/" if $colformats;
768 0         0 $colformats .= ($ix+1)."/$_";
769             }
770             }
771             $filter_name.":"
772             # Tokens 1-4: FldSep=',' TxtDelim='"' Charset FirstLineNum
773             #. "44,34,$charset,1"
774             . ord($opts->{sep_char}//",").","
775 0   0     0 . ord($opts->{quote_char}//'"').","
      0        
776             . "$charset,1"
777             # Token 5: Cell format codes:
778             # If variable-width cells (the norm): colnum/fmt/colnum/fmt...
779             # colnum: 1-based column number
780             # fmt: 1=Std 2=Text 3=MM/DD/YY 4=DD/MM/YY 5=YY/MM/DD 6-8 unused
781             # 9=ignore field (do not import),
782             # 10=US-English content (e.g. 3.14 not 3,14)
783             # (I'm guessing 1=Std means use current lang [or per Tok 6?])
784             # If fixed-width cells... [something else]
785             . ",$colformats"
786             # Token 6: MS-LCID Language Id; 0 or omitted means UI language
787             . "," # default: false
788             # Token 7: On input: true => Quoted cells are always read a 'text',
789             # effectively disabling Token 8. This must be false to recognize dates
790             # like "Jan 1, 2000" which by necessity must be quoted for the comma,
791             # but will **CORRUPT** zip codes with leading zeroes unless
792             # col_formats overrides (which it does now by default).
793             .",false" # default: false
794             # Token 8: on input: "Detect Special Numbers", i.e. date or time values
795             # in human form, numbers in scientific (expondntial) notation etc.
796             # If false, ONLY decimal numbers (thousands separators ok).
797             .",true" # default: false (for import)
798             # Tokens 9-10: not used on import
799             .",,"
800             # Token 11: Remove spaces; trim leading & trailing spaces when reading
801             ."," # default: false
802             # Token 12: not use on import
803             .","
804             # Token 13: Import "=..." as formulas instead of text?
805             ."," # default: false i.e. do not recognize formulas
806             # Token 14: "Automatically detected since LibreOffice 7.6" [BOM?]
807             .","
808             }
809             else {
810             undef
811 0         0 }
812             };
813              
814 0   0     0 my $ofilter = $opts->{soffice_outfilter} //= do{
815             # OutputFilterName[:paramtoken,paramtoken,...]
816 0 0       0 if ($opts->{cvt_to} eq "csv") {
817 0 0       0 my $filter_name = $suf2ofilter->{$opts->{cvt_to}} or oops;
818 0         0 my $enc = $opts->{output_encoding};
819 0         0 my $charset = _name2LOcharsetnum($enc); # dies if unknown enc
820             $filter_name.":"
821             # Tokens 1-4: FldSep=, TxtDelim=" Charset FirstLineNum
822             #."44,34,$charset,1"
823             . ord($opts->{sep_char}//",").","
824             . ord($opts->{quote_char}//'"').","
825             . "$charset,1"
826             # Token 5: Cell format codes. Only used for import? (see above)
827             # What about fixed-width?
828             .","
829             # Token 6: Language identifier (uses Microsoft lang ids)
830             # 1033 means US-English (omitted => use UI's language)
831             .","
832             # Token 7: QuoteAllTextCells
833             # *** true will "quote" even single-bareword cells, which looks
834             # *** bad and makes t/ tests messier, but preserves information
835             # *** that such cells were not numbers or dates, etc. This ensures
836             # *** that Zip codes, etc. with leading zeroes won't be corrupted
837             # *** if converted back into a spreadsheet
838             # Option #1: Specify true to quote all cells on export, then post-process
839             # the result to un-quote obviously safe cells (for yet more overhead).
840             # Option #2: Specify false, and assume the resulting CSV will never
841             # be imported into a spreadsheet except via us, and we pre-scan
842             # the data to generate {col_formats} so will usually be safe.
843             # 5/30/23: Switching to Option #2...
844             .",false"
845             # Token 8: on output: true to store number as numbers; false to
846             # store number cells as text. No UI equivalent.
847             .",true" # default: documented as true (for export) BUT IS NOT!
848             # Token 9: "Save cell contents as shown"
849             # Generally we DO NOT want this because things like dates
850             # can be formatted many different ways.
851             ##.",".($opts->{raw_values} ? "false" : "true")
852             .",false"
853             # Token 10: "Export cell formulas"
854             .",false"
855             # Token 11: not used for export
856             .","
857             # Token 12: (LO 7.2+) sheet selections:
858             # 0 or absent => the "first" sheet
859             # 1-N => the Nth sheet (arrgh, can not specify name!!)
860             # -1 => export all sheets to files named filebasenamne.Sheetname.csv
861             .",".($opts->{allsheets} ? -1 :
862 0 0 0     0 $opts->{sheetname} ? die("add named-sheet support here") :
    0 0        
863             0)
864             # Token 13: Not used for export
865             .","
866             # Token 14: true to include BOM in the result
867             #.","
868             }
869             else {
870             undef
871 0         0 }
872             };
873              
874             # We can only control the output directory path, not the name of
875             # an individual result file. If $dst is a directory then the result
876             # could theoretically output into it directly, but instead we always
877             # output to an ephemeral temp directory and then move the results to $dst
878             #
879             # With 'allsheets' the resulting files must be renamed to conform to our
880             # external API (namely SHEETNAME.csv).
881             #
882             # ERROR DETECTION: As of LO 7.5 we always get zero exist status and the
883             # only way to detect errors is to notice that no files were written.
884             # https://bugs.documentfoundation.org/show_bug.cgi?id=155415
885             #
886 0         0 my $tdir = Path::Tiny->tempdir(path($dst)->basename."_XXXXX");
887             # will be deleted when $dirpath goes out of scope
888              
889             my @cmd = ($prog,
890             "--headless", "--invisible",
891             "--nolockcheck", "--norestore",
892             "--view", # open read-only in case can't create lockfile
893             $ifilter ? ("--infilter=$ifilter") : (),
894             "--convert-to",
895 0 0       0 $opts->{cvt_to}.($ofilter ? ":$ofilter" : ""),
    0          
896             "--outdir", $tdir->canonpath,
897             path($src)->canonpath);
898              
899 0 0       0 unless ($debug) {
900 0         0 $opts->{suppress_stdout} = 1;
901             #$opts->{suppress_stderr} = 1;
902             }
903              
904 0         0 my $cmdstatus = _runcmd($opts, @cmd);
905              
906 0 0       0 if ($cmdstatus != 0) {
907             # This should never happen, see ERROR DETECTION above
908             confess sprintf("($$) UNEXPECTED FAILURE, wstat=0x%04x\n",$cmdstatus),
909             " converting '$opts->{inpath}' to $opts->{cvt_to}\n",
910 0         0 " Command was: ",join(" ",map{qsh} @cmd);
  0         0  
911             }
912              
913 0         0 my @result_files = path($tdir)->children;
914 0 0       0 btw dvis '>> @result_files' if $debug;
915 0 0       0 if (@result_files == 0) {
916 0 0       0 croak qsh($src)." is missing or unreadable\n", "cmd: @cmd\n"
917             unless -r $src;
918 0         0 croak "Something went wrong, ",path($prog)->basename," produced no output\n",
919             "cmd: @cmd\n"
920             }
921              
922 0 0       0 if ($opts->{allsheets}) {
923             # Rename files to match our API (omit the spreadsheetbasename- prefix)
924 0         0 foreach (@result_files) {
925 0         0 my $dir = $_->parent; # Like dirname but including Volume:
926 0         0 my $base = $_->basename;
927 0 0       0 (my $newbase = $base) =~ s/^\Q$opts->{ifbase}\E-// or oops dvis '$base $opts';
928 0         0 my $newpath = $dir->child($newbase)->canonpath;
929 0         0 my $oldpath = path($_)->canonpath;
930 0 0       0 btw ">> Renaming $oldpath -> $newbase\n" if $debug;
931 0 0       0 rename ($oldpath, $newpath) or oops "$!";
932 0         0 $_ = $newpath; # update @result_files
933             }
934             }
935              
936             # Move the results to $dst
937 0 0       0 if (-e $dst) {
938 0 0       0 croak "$dst must be a directory if it pre-exists\n" unless -d $dst;
939 0 0       0 btw ">> Moving results -> $dst\n" if $debug;
940 0         0 foreach (@result_files) {
941 0 0       0 btw ">>> move $_ -> $dst" if $debug;
942 0         0 File::Copy::move($_, $dst)
943             }
944 0 0       0 btw ">> Now $dst contains: ",avis($dst->children) if $debug;
945             } else {
946 0 0       0 if ($opts->{allsheets}) {
947 0 0       0 btw ">> dirmove $tdir -> $dst\n" if $debug;
948 0 0       0 rename($tdir, $dst) or File::Copy::dirmove($tdir, $dst);
949             } else {
950 0 0       0 croak "Expecting only one result file, not @result_files"
951             if @result_files > 1;
952 0 0       0 btw ">> move $result_files[0] -> $dst\n" if $debug;
953 0         0 File::Copy::move($result_files[0], $dst);
954             }
955             }
956             }#_convert_using_openlibre
957              
958             sub _convert_using_ssconvert($$$) {
959 0     0   0 my ($opts, $src, $dst) = @_;
960 0         0 confess "Deprecated with extreme prejudice"; # no longer supported
961             ##
962             ## foreach (qw/inpath cvt_to /)
963             ## { oops "missing opts->{$_}" unless exists $opts->{$_} }
964             ##
965             ## my $eff_outpath = $opts->{outpath};
966             ## if (my $prog=which("ssconvert")) {
967             ## my $enc = _get_encodings_from_opts($opts);
968             ## $enc //= "UTF-8"; # default
969             ## my @options;
970             ## if ($opts->{cvt_to} eq "csv") {
971             ## push @options, '--export-type=Gnumeric_stf:stf_assistant';
972             ## my @dashO_terms = ("format=preserve", "transliterate-mode=escape");
973             ## push @dashO_terms, "charset='${enc}'" if defined($enc);
974             ## if ($opts->{sheetname}) {
975             ## push @dashO_terms, "sheet='$opts->{sheetname}'";
976             ## }
977             ## if ($opts->{allsheets}) {
978             ## #If both {allsheets} and {sheetname} are specified, only a single
979             ## # .csv file will be in the output directory
980             ## croak "'allsheets' option: 'outpath' must specify an existing directory"
981             ## unless -d $eff_outpath;
982             ## $eff_outpath = catfile($eff_outpath, "%s.csv");
983             ## push @options, "--export-file-per-sheet";
984             ## }
985             ## elsif ($opts->{sheetname}) {
986             ## # handled above
987             ## }
988             ## else {
989             ## # A backwards-incompatible change to ssconvert stopped extracting
990             ## # the "current" sheet by default; now all sheets are concatenated!
991             ## # See https://gitlab.gnome.org/GNOME/gnumeric/issues/461
992             ## # ssconvert verison 1.12.45 supports a new "-O active-sheet=y" option
993             ## ## PORTABILITY BUG: Redirection syntax will not work on windows
994             ## my ($ssver) = (qx/ssconvert --version 2>&1/ =~ /ssconvert version '?(\d[\d\.]*)/);
995             ## if (version::is_lax($ssver) && version->parse($ssver) >= v1.12.45) {
996             ## push @dashO_terms, "active-sheet=y";
997             ## } else {
998             ## croak("Due to an ssconvert bug, a sheetname must be given.\n",
999             ## "(for more information, see comment at ",__FILE__,
1000             ## " near line ", (__LINE__-10), ")\n");
1001             ## }
1002             ## }
1003             ## push @options, '-O', join(" ",@dashO_terms);
1004             ## }
1005             ## elsif ($opts->{cvt_to} eq 'xlsx') {
1006             ## @options = ('--export-type=Gnumeric_Excel:xlsx2');
1007             ## }
1008             ## elsif ($opts->{cvt_to} eq 'xls') {
1009             ## @options = ('--export-type=Gnumeric_Excel:excel_biff8'); # M'soft Excel 97/2000/XP
1010             ## }
1011             ## elsif ($opts->{cvt_to} =~ /^od/) {
1012             ## @options = ('--export-type=Gnumeric_OpenCalc:odf');
1013             ## }
1014             ## elsif ($eff_outpath =~ /\.[a-z]{3,4}$/) {
1015             ## # let ssconvert choose based on the output file suffix
1016             ## }
1017             ## else {
1018             ## croak "unrecognized cvt_to='".u($opts->{cvt_to})."' and no outpath suffix";
1019             ## }
1020             ##
1021             ## my $eff_inpath = $opts->{inpath};
1022             ## if ($opts->{sheetname} && $opts->{inpath} =~ /.csv$/i) {
1023             ## # Control generated sheet name by using a symlink to the input file
1024             ## # See http://stackoverflow.com/questions/22550050/how-to-convert-csv-to-xls-with-ssconvert
1025             ## my $td = catdir($tempdir // oops, "Gnumeric");
1026             ## remove_tree($td); mkdir($td) or die $!;
1027             ## $eff_inpath = catfile($td, $opts->{sheetname});
1028             ## symlink $opts->{inpath}, $eff_inpath or die $!;
1029             ## fixme: handle unimplmented or no-perms symlink failures
1030             ## }
1031             ## my @cmd = ($prog, @options, $eff_inpath, $eff_outpath);
1032             ##
1033             ## my $suppress_stderr = !$opts->{debug};
1034             ## if (0 != _runcmd({%$opts, suppress_stderr => $suppress_stderr}, @cmd)) {
1035             ## # Before showing a complicated ssconvert failure with backtrace,
1036             ## # check to see if the problem is just a non-existent input file
1037             ## { open my $dummy_fh, "<", $eff_inpath or croak "$eff_inpath : $!"; }
1038             ## my $failmsg = "($$) Conversion of '$opts->{inpath}' to $eff_outpath failed\n"."cmd: ".qshlist(@cmd)."\n";
1039             ## if ($suppress_stderr) { # repeat showing all output
1040             ## if (0 == _runcmd({%$opts, suppress_stderr => 0}, @cmd)) {
1041             ## warn "Surprise! Command failed the first time but succeeded on 2nd try!\n";
1042             ## }
1043             ## croak $failmsg;
1044             ## }
1045             ## }
1046             ## elsif (! -e $opts->{outpath}) {
1047             ## croak "($$) Conversion SILENTLY failed\n(using $prog)\n",
1048             ## " cmd: ",qshlist(@cmd),"\n"
1049             ## ;
1050             ## }
1051             ## return ($enc)
1052             ## }
1053             ## else {
1054             ## croak "Can not find ssconvert to convert '$opts->{inpath}' to $opts->{cvt_to}\n",
1055             ## "To install ssconvert: sudo apt-get install gnumeric\n";
1056             ## }
1057             }
1058              
1059             # Extracts |||SHEETNAME or !SHEETNAME or [SHEETNAME] from a path+sheet
1060             # specification, if present.
1061             # (Lots of historical compatibility issues...)
1062             # In scalar context, returns SHEETNAME or undef.
1063             # INTERNAL USE ONLY: In array context, returns (filepath, SHEETNAME or undef)
1064             sub sheetname_from_spec($) {
1065 6     6 1 26 my $spec = shift;
1066 6         11 local $_;
1067 6         18 my $p = path($spec);
1068 6         350 my $parent = $p->parent;
1069 6         407 my ($base,$sn) = ($p->basename =~ /^(.*) (?| \|\|\|([^\!\[\|]+)$
1070             | \!([^\!\[\|]+)$
1071             | \[([^\[\]]+)\]$
1072             )/x);
1073 6 100 33     114 wantarray ? ($parent->child($base//$p->basename)->stringify, $sn) : $sn
1074             }
1075             sub filepath_from_spec($) {
1076 2     2 1 6 my ($path, undef) = sheetname_from_spec($_[0]);
1077 2         103 $path
1078             }
1079             #Tester
1080             #foreach ("", "/a!b/c", "/a!b/c!sheet1", "/a/b/c[sheet2]", "/a/b/c[bozo]d.xls",
1081             # ) {
1082             # foreach($_, basename($_)) {
1083             # my ($fp,$sn) = sheetname_from_spec($_);
1084             # use open ':std', ':locale';
1085             # warn ivis '# $_ → $fp $sn\n';
1086             # my $sn2 = sheetname_from_spec($_);
1087             # die "bug" unless u($sn) eq u($sn2);
1088             # }
1089             #}
1090             #die "TEX";
1091              
1092             # Construct a file + sheetname spec in the preferred form for humans to read
1093             # If sheetname is undef, just return the file path
1094             sub form_spec_with_sheetname($$) {
1095 2     2 1 10 my ($filespec, $sheetname) = @_;
1096 2         7 my $embedded_sheetname = sheetname_from_spec($filespec);
1097 2 0 33     8 croak "conflicting embedded and separate sheetnames given"
      33        
1098             if $embedded_sheetname && $sheetname && $embedded_sheetname ne $sheetname;
1099 2   33     14 $sheetname //= $embedded_sheetname;
1100 2         6 my $filepath = filepath_from_spec($filespec);
1101 2 50       19 $sheetname ? "${filepath}[${sheetname}]" : $filepath
1102             #$sheetname ? "${filepath}|||${sheetname}" : $filepath
1103             }
1104              
1105             our $default_input_encodings = "UTF-8,windows-1252,UTF-16BE,UTF-16LE";
1106             our $default_output_encoding = "UTF-8";
1107              
1108             # Return digested %opts setting
1109             # sheetname, inpath_sans_sheet (as Path::Tiny), encoding or default
1110             sub _process_args($;@) {
1111 2 50   2   5 confess "fix obsolete call to pass linearized options"
1112             if ref($_[0]) eq "HASH";
1113 2 50       8 my $leading_inpath = ( scalar(@_) % 2 == 1 ? shift(@_) : undef );
1114 2         12 my %opts = (
1115             cvt_from => "",
1116             cvt_to => "",
1117             @_,
1118             #verbose => 999, tempdir => "/tmp/J",
1119             );
1120 2 50       12 if (defined $opts{inpath}) {
1121 0 0       0 croak "Initial INPATH arg specified as well as inpath => ... in options"
1122             if defined $leading_inpath;
1123             } else {
1124 2   33     13 $opts{inpath} = $leading_inpath // croak "No inpath was specified";
1125             }
1126 2 50       7 $opts{verbose}=1 if $opts{debug};
1127              
1128             # inpath or outpath may have "!sheetname" appended (or alternate syntaxes),
1129             # but may exist only if a separate 'sheetname' option is not specified.
1130             # Input and output can not both be spreadsheets; one must be a CSV.
1131 2 50       8 if (exists($opts{sheet})) {
1132 0         0 carp "WARNING: Deprecated 'sheet' option found (use 'sheetname' instead)\n";
1133 0 0       0 croak "Both {sheet} and {sheetname} specified" if exists $opts{sheetname};
1134 0         0 $opts{sheetname} = delete $opts{sheet};
1135             }
1136 2         3 { my ($path_sans_sheet, $sheetname, $key);
  2         5  
1137 2         7 for my $thiskey ('inpath', 'outpath') {
1138 4   100     17 my $spec = $opts{$thiskey} || next;
1139 2         8 my ($pssn, $sn) = sheetname_from_spec($spec);
1140 2 50       133 if (defined $sn) {
1141 0 0       0 croak "A sheetname is embeeded in both ",
1142             "'$thiskey' ($opts{$thiskey}) and '$key' ($opts{$key})\n"
1143             if $sheetname;
1144 0         0 ($path_sans_sheet, $sheetname, $key) = ($pssn, $sn, $thiskey);
1145             }
1146             }
1147 2 50       10 if ($opts{sheetname}) {
    50          
1148             croak "'sheetname' option conflicts with embedded sheet name\n",
1149             " sheetname => ", qsh($opts{sheetname}),"\n",
1150             " $key => ", qsh($opts{$key}),"\n"
1151 0 0 0     0 if defined($sheetname) && $sheetname ne $opts{sheetname};
1152             }
1153             elsif (defined $sheetname) {
1154             btw "(extracted sheet name \"$sheetname\" from $key)\n"
1155 0 0       0 if $opts{verbose};
1156 0         0 $opts{sheetname} = $sheetname;
1157             }
1158             $opts{inpath_sans_sheet} = path(
1159             ($key && $key eq 'inpath') ? $path_sans_sheet : $opts{inpath}
1160 2 50 33     11 );
1161             }
1162             # Input file basename sans any .suffix
1163 2         110 $opts{ifbase} = $opts{inpath_sans_sheet}->basename(qr/\.[^.]+/);
1164              
1165 2         135 %opts
1166             }#_process_args
1167              
1168             # Extract the of encoding(s) specified in an iolayers string
1169             # Parse iolayers string, returning ($prefix,[encodings],$suffix)
1170             # For example from ":raw:encodings(utf8,windows-1252):zz" the output
1171             # would be (":raw", [:utf8","windows-1252"], ":zz")
1172             sub _parse_iolayers($) {
1173 0   0 0   0 local $_ = (shift) // "";
1174 0 0       0 /\A(<prefix>.*?)
1175             (<encspec>:utf8|:encoding\(([^\)]+)\))
1176             (<suffix>.*?)\z/ or croak "Invalid iolayers spec '$_'\n";
1177 0         0 (my $prefix, $_, my $suffix) = ($+{prefix}, $+{encspec}, $+{suffix});
1178 0 0 0     0 /^:(utf8)$/ || /^:encoding\(([^\)]+)\)$/ or oops($_);
1179 0         0 my $enclist = [split /,/, $1]; # comma,separated,list,of,encodings
1180 0         0 ($prefix, $enclist, $suffix);
1181             }
1182              
1183             # Detect cvt_from and cvt_to from filenames, or peeking at the data.
1184             # If input is CSV, detect encoding, separator and quote characters;
1185             # add quotes to values with leading zeroes (e.g. Zip codes) which would
1186             # otherwise be corrupted by being read as numbers instead of text strings.
1187             # The modified data is written to a temp file
1188             # Set default output_encoding if not specified
1189             # RETURNS: The effective input path, either inpath_sans_sheet or a tempfile
1190             sub _determine_enc_tofrom($) {
1191             my $opts = shift;
1192             my $debug = $opts->{debug};
1193             # Skip to ==BODY== below
1194              
1195             my sub determine_input_encoding($) {
1196             my $r2octets = shift;
1197             # If user specified one encoding, use it; if user specified list, try them.
1198             # If user did not specify, the default is a list to try.
1199             $opts->{input_encoding} //= $default_input_encodings;
1200             my @enclist = split m#,#, $opts->{input_encoding};
1201             return
1202             if @enclist == 1;
1203             $$r2octets //= $opts->{inpath_sans_sheet}->slurp_raw;
1204             for my $enc (@enclist) {
1205             eval { decode($enc, $$r2octets, Encode::FB_CROAK|Encode::LEAVE_SRC) };
1206             if ($@) {
1207             btw "Input encoding '$enc' did not work...($@)\n" if $debug;
1208             next;
1209             }
1210             btw "Input encoding '$enc' seems to work.\n" if $debug;
1211             @enclist = ($enc);
1212             last
1213             }
1214             #croak "Could not detect encoding of $opts->{inpath_sans_sheet}\n"
1215             confess "Could not detect encoding of $opts->{inpath_sans_sheet}\n"
1216             if @enclist > 1;
1217             $opts->{input_encoding} = $enclist[0];
1218             } #determine_input_encoding
1219              
1220             my sub readparse_csv($@) {
1221             my $fh = shift;
1222             my %csvopts = (
1223             @sane_CSV_read_options,
1224             defined($opts->{quote_char}) ? (quote_char=>$opts->{quote_char}) : (),
1225             defined($opts->{sep_char}) ? (sep_char=>$opts->{sep_char}) : (),
1226             auto_diag => 2, # throw on error
1227             @_
1228             );
1229             $csvopts{escape_char} = $csvopts{quote_char}; # must always be the same
1230              
1231             my $csv = Text::CSV->new (\%csvopts)
1232             or croak "Text::CSV->new: ", Text::CSV->error_diag(),
1233             dvis('\n## %csvopts\n');
1234             my @rows;
1235             while (my $F = $csv->getline( $fh )) {
1236             push(@rows, $F);
1237             }
1238             \@rows
1239             }
1240              
1241             my sub open_input($) {
1242             my $r2octets = shift;
1243             oops unless $opts->{input_encoding};
1244             my $fh;
1245             my $pathish = defined($$r2octets)
1246             ? \$$r2octets : $opts->{inpath_sans_sheet};
1247 1     1   7 open($fh, "<:encoding($opts->{input_encoding})", $pathish)
  1         2  
  1         11  
1248             or die "$pathish : $!";
1249             $fh
1250             }
1251              
1252             my sub determine_csv_q_sep($$) {
1253             my ($r2octets, $r2rows) = @_;
1254             return
1255             if defined($opts->{quote_char}) && defined($opts->{sep_char});
1256              
1257             my $fh = open_input($r2octets);
1258              
1259             # my $chars;
1260             # if (defined($$r2octets)
1261             # $chars = decode($opts->{input_encoding},$$r2octets,Encode::FB_CROAK);
1262             # }
1263              
1264             # Try combinations starting with the most-common '"' and ',' while
1265             # parsing the file for unsafe unquoted values (throws on syntax error).
1266             # The expectation is that the first try usually succeeds
1267             Q:
1268             for my $q (defined($opts->{quote_char})
1269             ? ($opts->{quote_char}) : ("\"", "'")) {
1270             my $found_q;
1271             SEP:
1272             for my $sep (defined($opts->{sep_char})
1273             ? ($opts->{sep_char}) : (",","\t")) {
1274             btw dvis '--- TRYING $q $sep ---' if $debug;
1275             # # Preliminary check for an illegal use of the quote char
1276             # if (defined($chars)
1277             # && $chars =~ /[^${q}${sep}\x{0D}\x{0A}]
1278             # ${q}
1279             # (?=[^${q}${sep}\x{0D}\x{0A}] | \z)/gx) {
1280             # btw ivis '>>>quote_char CAN NOT BE $q with sep=$sep because q exists mid-field before pos ${\(pos($chars))}'
1281             # if $debug;
1282             # next SEP
1283             # }
1284             $$r2rows = eval{ readparse_csv($fh, quote_char=>$q, sep_char=>$sep) };
1285             if ($@ eq "") {
1286             warn ivis '>> Detected quote_char=$q sep_char=$sep\n' if $debug;
1287             $opts->{quote_char} = $q;
1288             $opts->{sep_char} = $sep;
1289             last Q;
1290             }
1291             warn vis '$@\nq=$q sep=$sep did not work...\n' if $debug;
1292             seek $fh, 0, SEEK_SET;
1293             }
1294             }
1295             unless (defined($$r2rows)) {
1296             confess "Input file is not valid CSV (or we have a bug)\n"
1297             }
1298             }#determine_csv_q_sep
1299              
1300             my sub determine_csv_col_formats($$) {
1301             my ($r2octets, $r2rows) = @_;
1302             return
1303             if defined $opts->{col_formats};
1304             $$r2rows //= do{
1305             my $fh = open_input($r2octets);
1306             readparse_csv($fh);
1307             };
1308             my $max_cols = 0; for my $row (@{ $$r2rows }) { $max_cols = @$row if $max_cols < @$row }
1309             state $curr_yy = (localtime(time))[5];
1310             my @col_formats;
1311             my sub recognized($$$$;$) {
1312             my ($cx, $rx, $thing, $format, $as_msg) = @_;
1313             $col_formats[$cx] = $format;
1314             return unless $debug;
1315             $as_msg //= " as ".vis($col_formats[$cx])." format";
1316             if (length($thing) > 35) { $thing = substr($thing,0,32)."..."; }
1317             @_ = ("Recognized ",$thing," in ", cxrx2sheetaddr($cx,$rx), $as_msg);
1318             goto &btw
1319             }
1320             CX:
1321             for my $cx (0..$max_cols-1) {
1322             RX:
1323             for my $rx (0..$#{$$r2rows}) {
1324             my $row = $$r2rows->[$rx];
1325             next if $cx > $#$row; # row has fewer columns than others
1326             for ($row->[$cx]) {
1327             # recognize obvious Y/M/D or M/D/Y or D/M/Y date forms
1328             if (m#\b(?<y>(?:[12]\d)?\d\d)/(?<m>\d\d)/(?<d>\d\d)\b#) {
1329             if ($+{d} > 12 && $+{d} <= 31 && $+{m} >= 1 && $+{m} <= 12
1330             && ($+{y} < 100 || $+{y} >= 1000)) {
1331             recognized($cx,$rx,$_,"YY/MM/DD");
1332             next CX;
1333             }
1334             # If ambiguous YYYY/??/?? we can still assume it is a date and not text
1335             if (length($+{y})==4) {
1336             #recognized($cx,$rx,$_,""," as some kind of date, fmt unknown");
1337             next RX;
1338             }
1339             }
1340             if (m#\b(?<m>\d\d)/(?<d>\d\d)/(?<y>(?:[12]\d)?\d\d)\b#) {
1341             if ($+{y} < 100 || $+{y} >= 1000) {
1342             if ($+{d} > 12 && $+{d} <= 31 && $+{m} >= 1 && $+{m} <= 12) {
1343             recognized($cx,$rx,$_,"MM/DD/YY");
1344             next CX
1345             }
1346             elsif ($+{m} > 12 && $+{m} <= 31 && $+{d} >= 1 && $+{d} <= 12) {
1347             recognized($cx,$rx,$_,"DD/MM/YY");
1348             next CX
1349             }
1350             }
1351             # If ambiguous ??/??/YYYY we can still assume it is a date and not text
1352             if (length($+{y})==4) {
1353             #recognized($cx,$rx,$_,""," as some kind of date, fmt unknown");
1354             next RX;
1355             }
1356             }
1357             # Things to force to be read as text fields:
1358             # 1. Leading zeroes
1359             # 2. Leading ascii minus (\x{2D}) rather than math minus \N{U+2212}.
1360             # This prevents conversion to the Unicode math minus when LO
1361             # reads the CSV. The assumption is that if the input has an ascii
1362             # minus then the original spreadsheet format was "text" not
1363             # numeric.
1364             if (/^[\x{2D}0]/) {
1365             recognized($cx,$rx,$_,"text");
1366             next CX;
1367             }
1368             }
1369             }
1370             }
1371             $opts->{col_formats} = \@col_formats;
1372             }#determine_csv_col_formats
1373              
1374             # ==BODY==
1375             unless ($opts->{cvt_to}) {
1376             if ($opts->{outpath} && $opts->{outpath} =~ /\.([^.]+)$/) {
1377             $opts->{cvt_to} = $1;
1378             }
1379             croak "'cvt_to' was not specified and can not be intuited from 'outpath'"
1380             ,dvis('\n### $opts') ###TEMP
1381             unless $opts->{cvt_to};
1382             }
1383             unless ($opts->{cvt_from}) {
1384             if ($opts->{inpath_sans_sheet} =~ /\.([^.]+)$/) {
1385             $opts->{cvt_from} = $1;
1386             }
1387             }
1388             $opts->{cvt_from} =~ s/^\.txt$/.csv/i if $opts->{cvt_from};
1389              
1390             # Detect file format and, if CSV, encoding
1391             my ($octets, $rows);
1392             if (!$opts->{cvt_from} || $opts->{cvt_from} eq "csv") {
1393             determine_input_encoding(\$octets);
1394             }
1395             if (!$opts->{cvt_from}) {
1396             # Detect the file format by looking at the data. Actually, we only
1397             # support CSV in this case, so this is just a (half-baked) sanity check.
1398             eval {
1399             determine_csv_q_sep(\$octets, \$rows);
1400             if (!$opts->{cvt_from}) {
1401             $rows //= do{
1402             my $fh = open_input(\$octets);
1403             readparse_csv($fh);
1404             };
1405             }
1406             };
1407             if ($@ eq "") {
1408             warn "> Detected $opts->{inpath_sans_sheet} as a seemingly-valid CSV\n"
1409             if $debug;
1410             $opts->{cvt_from} = "csv";
1411             } else {
1412             croak "Can not detect what kind of file ",qsh($opts->{inpath})," is\n";
1413             }
1414             }
1415              
1416             if ($opts->{cvt_from} eq "csv") {
1417             determine_csv_col_formats(\$octets, \$rows);
1418             } else {
1419             oops if defined($octets) or defined($rows);
1420             }
1421              
1422             # Set default ouput_encoding if not specified
1423             $opts->{output_encoding} //= $default_output_encoding
1424             if $opts->{cvt_to} eq "csv";
1425              
1426             }#_determine_enc_tofrom
1427              
1428             sub _tool_extract_all_csvs($$) {
1429 0     0   0 my ($opts, $destdir) = @_;
1430              
1431 0         0 _get_exclusive_lock($opts);
1432 0     0   0 scope_guard { _release_lock($opts); };
  0         0  
1433              
1434 0         0 delete local $opts->{sheetname};
1435 0         0 local $opts->{allsheets} = 1;
1436 0 0       0 if (_openlibre_supports_allsheets()) {
    0          
1437 0         0 _convert_using_openlibre($opts, $opts->{inpath_sans_sheet}, $destdir);
1438             }
1439             elsif (_ssconvert_supports_allsheets()) {
1440 0         0 _convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destdir);
1441             }
1442 0         0 else { confess "Can't extract 'allsheets'. Please install LibreOffice 7.2 or newer" }
1443             }
1444              
1445             sub _tool_can_extract_csv_byname() {
1446 0 0   0   0 _openlibre_supports_named_sheet() || _ssconvert_supports_named_sheet()
1447             }
1448             sub _tool_extract_one_csv($$) {
1449 0     0   0 my ($opts, $destpath) = @_;
1450              
1451 0         0 _get_exclusive_lock($opts);
1452 0     0   0 scope_guard { _release_lock($opts); };
  0         0  
1453              
1454 0 0       0 if (_openlibre_features()->{available}) {
1455 0 0 0     0 oops if $opts->{sheetname} && !_openlibre_supports_named_sheet();
1456 0         0 _convert_using_openlibre($opts, $opts->{inpath_sans_sheet}, $destpath);
1457             } else {
1458 0         0 _convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destpath);
1459             }
1460             }
1461             sub _tool_can_extract_current_sheet() {
1462             _openlibre_features()->{available} || _ssconvert_features()->{available}
1463 0 0   0   0 }
1464              
1465             sub _tool_write_spreadsheet($$) {
1466 0     0   0 my ($opts, $destpath) = @_;
1467              
1468 0         0 _get_exclusive_lock($opts);
1469 0     0   0 scope_guard { _release_lock($opts); };
  0         0  
1470              
1471             # ssconvert allows specifying the sheetname when importing a csv but not LO
1472 0 0 0     0 if ($opts->{sheetname} && _ssconvert_supports_writing($opts->{cvt_to})) {
    0          
    0          
1473 0         0 _convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destpath);
1474             }
1475             elsif (_openlibre_supports_writing($opts->{cvt_to})) {
1476 0 0       0 if ($opts->{sheetname}) {
1477 0         0 carp "WARNING: Sheet name when creating a spreadsheet will be ignored\n";
1478 0         0 delete $opts->{sheetname};
1479             }
1480 0         0 _convert_using_openlibre($opts, $opts->{inpath_sans_sheet}, $destpath);
1481             }
1482             elsif (_ssconvert_supports_writing($opts->{cvt_to})) {
1483 0         0 _convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destpath);
1484             }
1485 0         0 else { croak "Can't create $opts->{cvt_to} spreadsheets. Please install LibreOffice 7.2 or newer" }
1486             }
1487              
1488              
1489             # Extract CSVs for every sheet into {outpath} (setting to tmpdir if not preset).
1490             # If cached CSVs are available they are moved into {outpath}/ .
1491             sub _extract_all_csvs($) {
1492 0     0   0 my ($opts) = @_;
1493 0         0 my $outpath = _final_outpath($opts);
1494 0         0 $outpath->mkpath; # nop if exists, croaks if conflicts with file
1495              
1496 0         0 _tool_extract_all_csvs($opts, $outpath); #logs
1497             }
1498              
1499              
1500             # Extract a single sheet into a CSV at {outpath} (defaulting to temp file).
1501             # If a cached CSV is available it is moved to {outpath}.
1502             sub _extract_one_csv($) {
1503             my ($opts) = @_;
1504             my $cachedirpath = _cachedir($opts);
1505              
1506             my sub _fill_csv_cache() {
1507             $cachedirpath->remove_tree;
1508             $cachedirpath->mkpath;
1509             { #local $opts->{verbose} = 0;
1510             #local $opts->{debug} = 0;
1511             _tool_extract_all_csvs($opts, $cachedirpath);
1512             }
1513             }
1514              
1515             my $outpath = _final_outpath($opts);
1516             $outpath->remove unless -d $outpath;
1517              
1518             if (defined($opts->{sheetname})) {
1519             my $fname = $opts->{sheetname}.".csv";
1520             my $cached_path = $cachedirpath->child($fname);
1521             if (! -e $cached_path) {
1522             if (_tool_can_extract_csv_byname()) {
1523             _tool_extract_one_csv($opts, $outpath); #logs
1524             return
1525             }
1526             warn ">>Emulating extract-by-name by extracting all csvs into cache...\n"
1527             if $opts->{debug};
1528             _fill_csv_cache;
1529             }
1530             croak "Sheet '$opts->{sheetname}' does not exist in $opts->{inpath_sans_sheet}\n"
1531             unless -e $cached_path;
1532             warn "> Moving cached $fname to $outpath\n" if $opts->{verbose};
1533             File::Copy::move($cached_path, $outpath);
1534             return
1535             }
1536             elsif (_tool_can_extract_current_sheet()) {
1537             _tool_extract_one_csv($opts, $outpath); #logs
1538             return
1539             }
1540             else {
1541             _fill_csv_cache;
1542             my @children = $cachedirpath->children;
1543             if (@children == 0) {
1544             croak "$opts->{inpath_sans_sheet} appears to have zero sheets!\n"
1545             }
1546             elsif (@children == 1) {
1547             my $fname = $children[0]->basename;
1548             my $cached_path = $cachedirpath->child($fname);
1549             warn "> Moving cached $fname to $outpath\n" if $opts->{verbose};
1550             File::Copy::move($cached_path, $outpath);
1551             return
1552             }
1553             else {
1554             croak "$opts->{inpath_sans_sheet} contains multiple sheets; you must specify a sheetname\n"
1555             }
1556             }
1557             }
1558             sub _write_spreadsheet($) {
1559 0     0   0 my ($opts) = @_;
1560              
1561 0         0 my $outpath = _final_outpath($opts);
1562 0 0       0 $outpath->remove unless -d $outpath;
1563              
1564 0         0 _tool_write_spreadsheet($opts, $outpath);
1565             }
1566              
1567             # If {outpath} is not set, set it to a unique output path in $tempdir
1568             # Always returns {outpath} as a Path::Tiny object.
1569             sub _final_outpath($) {
1570 0     0   0 my $opts = shift;
1571 0 0       0 if (defined $opts->{outpath}) {
1572 0         0 return path($opts->{outpath});
1573             } else {
1574 0 0       0 my $suf = $opts->{cvt_to} unless $opts->{allsheets};
1575             return(
1576 0         0 ($opts->{outpath}=_path_under_tempdir($opts, suf=>$suf))
1577             );
1578             }
1579             }
1580              
1581             sub convert_spreadsheet(@) {
1582             # Set inpath_sans_sheet, sheetname, ifbase, etc.
1583 2     2 1 8 my %opts = &_process_args;
1584 2 50       14 btw dvis('>>> convert_spreadsheet %opts\n') if $opts{debug};
1585 2         11 my %input_opts = %opts;
1586              
1587 2         13 _create_tempdir_if_needed(\%opts);
1588              
1589             # intuit cvt_from & cvt_to, detect encoding, and pre-process .csv input
1590             # if needed to avoid corruption of leading zeroes.
1591 2         63 _determine_enc_tofrom(\%opts);
1592              
1593 2         15 my $input_enc = $opts{input_encoding};
1594 2         7 my $output_enc = $opts{output_encoding};
1595              
1596             croak "Either input or output must be 'csv'\n"
1597 2 50 33     12 unless $opts{cvt_from} eq 'csv' || $opts{cvt_to} eq 'csv';
1598 2 50       7 if ($opts{allsheets}) {
1599             croak "'allsheets' is allowed only with cvt_to => 'csv'"
1600 0 0 0     0 unless ($opts{cvt_to}//"") eq "csv";
1601             croak "With 'allsheets', a sheet name may not be specified\n"
1602 0 0       0 if $opts{sheetname};
1603             croak "With 'allsheets', 'outpath' must be a directory if it exists\n"
1604 0 0 0     0 if $opts{outpath} && -e $opts{outpath} && ! -d _;
      0        
1605             }
1606              
1607 2         4 my $done;
1608 2 50       6 if ($opts{cvt_from} eq $opts{cvt_to}) { # csv to csv
1609 2 50       5 if (!$opts{allsheets}) {
1610 2 50       7 if ($input_enc ne $output_enc) {
1611             # Special case #1: in & out are CSVs but different encodings.
1612             warn "> Transcoding csv: $input_enc -> $output_enc\n"
1613 0 0       0 if $opts{debug};
1614 0         0 my $octets = $opts{inpath_sans_sheet}->slurp_raw;
1615 0         0 my $chars = decode($input_enc, $octets, Encode::FB_CROAK);
1616 0         0 $octets = encode($output_enc, $chars, Encode::FB_CROAK);
1617 0         0 path(_final_outpath(\%opts))->spew_raw($octets);
1618 0         0 $done = 1;
1619             } else {
1620             # Special case #2: No conversion is needed: Just copy the file or
1621             # return the input path itself as the output
1622 2 50       7 if (defined $opts{outpath}) {
1623             warn "> No conversion needed, copying into ",qsh($opts{outpath}),"\n"
1624 0 0       0 if $opts{verbose};
1625 0         0 $opts{inpath_sans_sheet}->copy($opts{outpath});
1626 0         0 $done = 1;
1627             } else {
1628 2         5 $opts{outpath} = $opts{inpath_sans_sheet};
1629             warn "> No conversion needed, returning ", qsh($opts{outpath}),"\n"
1630 2 50       5 if $opts{verbose};
1631 2         5 $done = 1;
1632             }
1633             }
1634             }
1635             else {
1636             # Special case #2: <allsheets> with input already a csv:
1637             # Leave a symlink to the input in the <outpath> directory.
1638 0 0       0 croak "transcoding not implemented in this situation"
1639             if ($input_enc ne $output_enc);
1640 0         0 my $outpath = path(_final_outpath(\%opts));
1641 0         0 $outpath->mkpath; # nop if exists, croaks if conflicts with file
1642 0         0 my $dest = $outpath->child( $opts{ifbase}.".csv" );
1643 0         0 my $inpath = $opts{inpath_sans_sheet};
1644 0         0 my $s = eval{ symlink($inpath, $dest) };
  0         0  
1645 0 0 0     0 if ($@ or !$s) { # symlink unimplmented or insufficient permissions
1646 0 0       0 btw dvis '>> $@' if $opts{debug};
1647             warn "> No conversion needed! Copying into ", qsh($dest),"\n"
1648 0 0       0 if $opts{verbose};
1649 0         0 $opts{inpath_sans_sheet}->copy($dest);
1650             } else {
1651             warn "> No conversion needed! Left symlink at ", qsh($dest),"\n"
1652 0 0       0 if $opts{verbose};
1653             }
1654 0         0 $done = 1;
1655             }
1656             }
1657 2 50       7 if (! $done) {
1658 0 0       0 if ($opts{allsheets}) {
1659 0         0 _extract_all_csvs(\%opts);
1660             }
1661             else {
1662             # Result will be a single file.
1663 0 0       0 if ($opts{cvt_to} eq "csv") {
1664 0         0 _extract_one_csv(\%opts);
1665             } else {
1666 0         0 _write_spreadsheet(\%opts);
1667             }
1668             }
1669             }
1670             my $result = {
1671             defined($output_enc) ? (encoding => $output_enc):(),
1672 8         30 (map{ my $v = $opts{$_};
1673 8 100       35 ($_ => (blessed($v) ? $v->stringify : $v))
1674 2 50       7 } grep{ defined $opts{$_} }
  10         23  
1675             qw/inpath_sans_sheet outpath cvt_from cvt_to sheetname/)
1676             };
1677             log_call [\%input_opts], [$result, \_fmt_outpath_contents($result)]
1678 2 50       8 if $opts{verbose};
1679              
1680 2         9 $result;
1681             }#convert_spreadsheet
1682              
1683             # Open as a CSV, intuiting input encoding, converting spreadsheet if necessary.
1684             #
1685             # :crlf translation is enabled on the resulting file handle, which converts
1686             # DOS CR,LF to \n while passing *nix bare LF through unmolested.
1687             #
1688             # Input argument(s) are the same as for convert_spreadsheet (except
1689             # outpath may not be specified).
1690             #
1691             # Returns a hash containing the file handle and other information.
1692             sub OpenAsCsv {
1693 2 50   2 1 31 my %opts = (
1694             (@_ == 1 ? (inpath => $_[0]) : (@_)),
1695             cvt_to => 'csv',
1696             );
1697             # TODO: Rename {path} to {inpath} in all usages and rm this cruft;
1698             carp "Obsolete OpenAsCsv usage: Change path to inpath\n"
1699 2 50 33     12 if exists($opts{path}) and !$opts{silent};
1700 2   33     21 $opts{inpath} //= delete $opts{path}; # be compatible with old API
1701              
1702 2         8 my $inpath = delete $opts{inpath};
1703 2 50       19 croak "OpenAsCsv: missing 'inpath' option\n" unless $inpath;
1704 2 50       7 croak "OpenAsCsv: outpath may not be specified\n" if $opts{outpath};
1705              
1706 2         11 my $h = convert_spreadsheet($inpath, %opts, verbose => $opts{debug});
1707 2 50       9 oops "sheetname key bug" if exists $h->{sheet};
1708              
1709 2   33     8 my $csvpath = $h->{outpath} // oops; # same as {inpath} if already a CSV
1710 2 50       87 open my $fh, "<", $csvpath or croak "$csvpath : $!\n";
1711 2 50       45 binmode $fh, ":crlf:encoding(".$h->{encoding}.")" or die "binmode:$!";
1712              
1713             my $r = {
1714             fh => $fh,
1715             csvpath => $csvpath,
1716             inpath => $inpath,
1717 2 100       120 (map{ exists($h->{$_}) ? ($_ => $h->{$_}) : () }
  10         39  
1718             qw/inpath_sans_sheet sheetname encoding tempdir raw_values/),
1719             };
1720              
1721 2         15 return $r;
1722             }
1723              
1724             1;
1725             __END__
1726              
1727             =pod
1728              
1729             =head1 NAME
1730              
1731             Spreadsheet::Edit::IO - convert between spreadsheet and csv files
1732              
1733             =head1 SYNOPSIS
1734              
1735             use Spreadsheet::Edit::IO qw/
1736             convert_spreadsheet OpenAsCsv
1737             cx2let let2cx
1738             @sane_CSV_read_options @sane_CSV_write_options/;
1739              
1740             # Open a CSV file or result of converting a sheet from a spreadsheet
1741             my $hash = OpenAsCsv("/path/to/spreadsheet.odt!Sheet1"); # single-arg form
1742             my $hash = OpenAsCsv(inpath => "/path/to/spreadsheet.odt",
1743             sheetname -> "Sheet1");
1744              
1745             print "Reading ",$hash->{csvpath}," with encoding ",$hash->{encoding},"\n";
1746             while (<$hash->{fh}>) { ... }
1747              
1748             # Convert CSV to spreadsheet in temp file (deleted at process exit)
1749             $hash = convert_spreadsheet(inpath => "mycsv.csv", cvt_to => "xlsx");
1750             print "Output is $hash->{outpath}\n"; # e.g. "/tmp/dwYT6qf/mycsv.xlsx"
1751              
1752             # Convert *all* sheets to CSV files in a temp directory
1753             $hash = convert_spreadsheet(inpath => "mywork.xls", cvt_to => "csv",
1754             allsheets => 1);
1755             opendir $dh, $hash->{outpath};
1756             while (readrir($h)) { say "Output csv file is $_" }
1757              
1758             # Transcode a CSV from windows-1252 to UTF-8
1759             convert_spreadsheet(
1760             inpath => "input.csv", input_encoding => 'windows-1252',
1761             outpath => "output.csv", output_encodoutg => 'UTF-8',
1762             );
1763              
1764             # Translate between 0-based column index and letter code (A, B, etc.)
1765             print cx2let(0); # "A"
1766             print let2cx("A"); # 0
1767             print cx2let(26); # "AA"
1768             print let2cx("ABC"); # 730
1769              
1770             # Extract components from "filepath!SHEETNAME" specifiers
1771             my $path = filepath_from_spec("/path/to/spreasheet.xls!Sheet1")
1772             my $sheetname = sheetname_from_spec("/path/to/spreasheet.xls!Sheet1")
1773              
1774             # Parse a csv file with sane options
1775             my $csv = Text::CSV->new({ @sane_CSV_read_options, eol => $hash->{eol} })
1776             or die "ERROR: ".Text::CSV->error_diag ();
1777             my @rows
1778             while (my $F = $csv->getline( $infh )) {
1779             push @rows, $F;
1780             }
1781             close $infh or die "Error reading ", $hash->csvpath(), ": $!";
1782              
1783             # Write a csv file with sane options
1784             my $ocsv = Text::CSV->new({ @sane_CSV_write_options })
1785             or die "ERROR: ".Text::CSV->error_diag ();
1786             open my $outfh, ">:encoding(utf8)", $outpath
1787             or die "$outpath: $!";
1788             foreach (@rows) { $ocsv->print($outfh, $_) }
1789             close $outfh or die "Error writing $outpath: $!";
1790              
1791             =head1 DESCRIPTION
1792              
1793             Convert between CSV and spreadsheet files using external tools, plus some utility functions
1794              
1795             Currently this uses LibreOffice or OpenOffice (whatever is installed). An external tool is not needed when only CSV files are involved.
1796              
1797             =head2 $hash = OpenAsCsv INPUT
1798              
1799             =head2 $hash = OpenAsCsv inpath => INPUT, sheetname => SHEETNAME, ...
1800              
1801             This is a thin wrapper for C<convert_spreadsheet> followed by C<open>
1802              
1803             If a single argument is given it specifies INPUT; otherwise all arguments must
1804             be specified as key => value pairs, and may include any options supported
1805             by C<convert_spreadsheet>.
1806              
1807             INPUT may be a csv or spreadsheet workbook path; if a spreadsheet,
1808             then a single "sheet" is converted, specified by either a !SHEETNAME suffix
1809             in the INPUT path, a separate C<< sheetname => SHEETNAME >> option,
1810             or unspecified to extract the only sheet (croaks if there is more than one).
1811              
1812             The resulting file handle refers to a guaranteed-seekable CSV file;
1813             this will either be a temporary file (auto-removed at process exit),
1814             or the original INPUT if it was already a seekable csv file.
1815              
1816             RETURNS: A ref to a hash containing the following:
1817              
1818             {
1819             fh => the resulting open file handle
1820             csvpath => the path {fh} refers to, which might be a temporary file
1821             sheetname => sheet name if the input was a spreadsheet
1822             }
1823              
1824             =head2 convert_spreadsheet INPUT, cvt_to=>suffix, OPTIONS
1825              
1826             =head2 convert_spreadsheet INPUT, cvt_to=>"csv", allsheets => 1, OPTIONS
1827              
1828             Convert CSV to spreadsheet or vice-versa, or transcode CSV to CSV.
1829              
1830             RETURNS: A ref to a hash containing:
1831              
1832             {
1833             outpath => path to the output file (or directory with 'allsheets')
1834             (a temporary file/dir or as you specified in OPTIONS).
1835              
1836             encoding => the encoding used when writing .csv files
1837             }
1838              
1839             INPUT is the input file path; it may be a separate first argument as
1840             shown above, or else included in OPTIONS as C<< inpath =E<gt> INPUT >>.
1841              
1842             If C<outpath =E<gt> OUTPATH> is specifed then results are I<always> saved
1843             to that path. With C<allsheets> this is a directory, which will be created
1844             if necessary.
1845              
1846             If C<outpath> is NOT specified in OPTIONS then, with one exception,
1847             results are saved to a temporary file or directory and that path is returned
1848             as C<outpath> in the result hash.
1849             The exception is if no conversion is necessary
1850             (i.e. C<cvt_from> is the same as C<cvt_to>), when the
1851             input file itself is returned as C<outpath>.
1852              
1853             In all cases C<outpath> in the result hash points to the results.
1854              
1855             C<cvt_to> or C<cvt_from> are filename suffixes (sans dot)
1856             e.g. "csv", "xlsx", etc., and need not be specified when indicated by
1857             C<outpath> or INPUT parameters.
1858              
1859             OPTIONS may also include:
1860              
1861             =over 4
1862              
1863             =item sheetname => "sheet name"
1864              
1865             The workbook 'sheet' name used when reading or writing a spreadsheet.
1866             An input sheet name may also be specified as "!sheetname" appended to
1867             the INPUT path.
1868              
1869             =item allsheets => BOOL
1870              
1871             B<All> sheets in the input
1872             are converted to separate .csv files named "SHEETNAME.csv" in
1873             the 'outpath' directory. C<< cvt_to =E<gt> 'csv' >> is also requred.
1874              
1875             =item input_encoding => ENCODING
1876              
1877             Specifies the encoding of INPUT if it is a csv file.
1878              
1879             ENCODING may be a comma-separated list of encoding
1880             names which will be tried in the order until one seems to work.
1881             If only one is specified it will be used without trying it first.
1882             The default is "UTF-8,windows-1252".
1883              
1884             =item output_encoding => ENCODING
1885              
1886             Used when writing csv file(s), defaults to 'UTF-8'.
1887              
1888             =item col_formats => [...]
1889              
1890             This specifies how CSV data is imported into a spreadsheet. Each element
1891             of the array may contain:
1892              
1893             undef, "standard" or "" (LibreOffice will auto-detect)
1894             "text" (imported as unmolested text)
1895             "MM/DD/YY",
1896             "DD/MM/YY",
1897             "YY/MM/DD",
1898             "ignore" (do not import this column)
1899              
1900             Elements may also contain the numeric format codes defined by LibreOffice
1901             at L<https://wiki.documentfoundation.org/Documentation/DevGuide/Spreadsheet_Documents#Filter_Options_for_the_CSV_Filter>
1902              
1903             B<Automatic format detection:>
1904             Input CSV data is pre-scanned to auto-detect column formats
1905             as much as possible. This usually works well as long as dates are
1906             represented unambiguously, e.g. "2021-01-01" or "Jan 1, 2023".
1907              
1908             Specifically, this detects leading zeroes such as in U.S. Zip Codes,
1909             and MM/DD/YY or DD/MM/YY dates when a DD happens to be more than 12.
1910              
1911             =item verbose => BOOL
1912              
1913             =back
1914              
1915             =head3 B<'binmode' Argument For Reading result CSVs>
1916              
1917             It is not possible to control the line-ending style in output CSV files,
1918             but the following incantation will correctly read either DOS/Windows (CR,LF)
1919             or *nix (LF) line endings properly, i.e. as a single \n:
1920              
1921             open my $fh, "<", $resulthash->{outpath};
1922             my $enc = $resulthash->{encoding};
1923             binmode($fh, ":raw:encoding($enc):crlf");
1924              
1925             =head2 @sane_CSV_read_options
1926              
1927             =head2 @sane_CSV_write_options
1928              
1929             These contain options you will always want to use with
1930             S<<< C<< Text::CSV->new() >> >>>.
1931             Specifically, quotes and embedded newlines are handled correctly.
1932              
1933             Not exported by default.
1934              
1935             =head2 cx2let COLUMNINDEX
1936              
1937             =head2 let2cx LETTERCODE
1938              
1939             Functions which translate between spreadsheet-column
1940             letter codes ("A", "B", etc.) and 0-based column indicies.
1941             Not exported by default.
1942              
1943             =head2 filepath_from_spec EXPR
1944              
1945             =head2 sheetname_from_spec EXPR
1946              
1947             Functions which decompose strings containing a spreadsheet path and possibly sheetname
1948             suffix, of the form "FILEPATH!SHEETNAME", "FILEPATH|||SHEETNAME", or "FILEPATH[SHEETNAME]".
1949             C<sheetname_from_spec> returns C<undef> if the input does not have a
1950             a sheetname suffix.
1951             Not exported by default.
1952              
1953             =head2 form_spec_with_sheetname(PATH, SHEENAME)
1954              
1955             Composes a combined string in a "preferred" format (currently "PATH!SHEETNAME").
1956             Not exported by default.
1957              
1958             =head1 Testing if LibreOffice etc. is Installed
1959              
1960             =head2 $bool = can_cvt_spreadsheets();
1961              
1962             =head2 $bool = sub can_extract_allsheets();
1963              
1964             =head2 $bool = can_extract_named_sheet();
1965              
1966             These feature-test functions return false if the corresponding operations
1967             are not possible because LibreOffice (or, someday gnumeric) is not installed
1968             or is an older version which does not have needed capabilities.
1969              
1970             =head2 $path = openlibreoffice_path();
1971              
1972             Returns the detected path of I<soffice> (Apache Open Office or Libre Office)
1973             or undef if not found.
1974              
1975             These are not exported by default.
1976              
1977             =head1 SEE ALSO
1978              
1979             L<Spreadsheet::Edit> and L<Text::CSV>
1980              
1981             =cut
1982