File Coverage

blib/lib/Spreadsheet/Edit.pm
Criterion Covered Total %
statement 698 1505 46.3
branch 187 678 27.5
condition 74 346 21.3
subroutine 133 222 59.9
pod 48 61 78.6
total 1140 2812 40.5


line stmt bran cond sub pod time code
1             # License: Public Domain or CC0
2             # See https://creativecommons.org/publicdomain/zero/1.0/
3             # The author, Jim Avera (jim.avera at gmail) has waived all copyright and
4             # related or neighboring rights. Attribution is requested but is not required.
5              
6             # Pod documentation is below (use perldoc to view)
7              
8 3     3   3866063 use 5.18.0; # lexical subs with bug we have worked around
  3         28  
9 3     3   18 use strict; use warnings FATAL => 'all'; use utf8;
  3     3   7  
  3     3   88  
  3         15  
  3         6  
  3         127  
  3         16  
  3         6  
  3         20  
10 3     3   87 no warnings qw(experimental::lexical_subs);
  3         6  
  3         136  
11 3     3   20 use feature qw(state lexical_subs current_sub);
  3         4  
  3         381  
12              
13             package Spreadsheet::Edit;
14              
15             # Allow "use <thismodule> <someversion>;" in development sandbox to not bomb
16 3     3   37 { no strict 'refs'; ${__PACKAGE__."::VER"."SION"} = 1999.999; }
  3         6  
  3         458  
17              
18             our $VERSION = '1000.009'; # VERSION from Dist::Zilla::Plugin::OurPkgVersion
19             our $DATE = '2023-09-23'; # DATE from Dist::Zilla::Plugin::OurDate
20              
21             # FIXME: cmd_nesting does nothing except prefix >s to log messages.
22             # Shouldn't it skip that many "public" call frames???
23              
24             # TODO FIXME: Integrate with Spreadsheet::Read and provide a formatting API
25              
26             # TODO: Allow & support undef cell values (see Text::CSV_XS), used to
27             # represent "NULL" when interfacing with database systems.
28             # OTOH, this conflicts with failed optional alias keys
29              
30             # TODO: Add some way to exit an apply() early, e.g. return value?
31             # or maybe provide an abort_apply(resultval) function
32             # which throws an exception we can catch in apply?
33              
34             # TODO: Use Tie::File to avoid storing entire sheet in memory
35             # (requires seekable, so must depend on OpenAsCsv
36             # copying "-" to a temp file if it is not a plain file).
37              
38             #########################################
39             # sub NAMING CONVENTION:
40             # sub public_method_or_function
41             # sub _internal_method
42             # sub __internal_function
43             #########################################
44              
45             # If these globals are *defined* then the corresponding
46             # (downcased) options default accordingly when new sheets are created.
47             our ($Debug, $Verbose, $Silent);
48              
49             ############################## Exporting stuff ############################
50             #
51 3     3   1627 use Exporter::Tiny 1.001_000 (); # just to require version with non-sub generator support
  3         15440  
  3         105  
52 3     3   22 use parent "Exporter::Tiny"; # make us be a derived class
  3         6  
  3         24  
53              
54             require mro; # makes next::can available
55              
56             sub import {
57             # copied from List::Util
58             # (RT88848) Touch the caller's $a and $b, to avoid the warning of
59             # 'Name "main::a" used only once: possible typo'
60 5     5   504 my $pkg = caller;
61 3     3   313 no strict 'refs';
  3         8  
  3         539  
62 5         11 ${"${pkg}::a"} = ${"${pkg}::a"};
  5         16  
  5         27  
63 5         9 ${"${pkg}::b"} = ${"${pkg}::b"};
  5         14  
  5         18  
64              
65 5         11 my $this = $_[0];
66 5         9 goto &{ $this->next::can }; # see 'perldoc mro'
  5         37  
67             #goto &maybe::next::method ; ???
68             }
69              
70 3     3   23 use Symbol qw/gensym/;
  3         7  
  3         4682  
71              
72             our @EXPORT = qw(
73             alias apply apply_all apply_exceptrx apply_torx attributes
74             spectocx data_source delete_col delete_cols delete_row delete_rows
75             transpose join_cols join_cols_sep move_col
76             move_cols insert_col insert_cols insert_row insert_rows new_sheet only_cols
77             options read_spreadsheet rename_cols reverse_cols
78             sheet sheetname sort_rows split_col tie_column_vars title2ident title_row
79             title_rx unalias write_csv write_spreadsheet );
80              
81             my @stdvars = qw( $title_rx $num_cols
82             @rows @linenums @meta_info %colx %colx_desc $title_row
83             $rx $linenum @crow %crow );
84              
85             our @EXPORT_OK = (@stdvars, qw/logmsg cx2let let2cx fmt_sheet/);
86              
87             our %EXPORT_TAGS = (
88             STDVARS => [@stdvars],
89             FUNCS => [@EXPORT],
90             default => [':FUNCS'],
91             DEFAULT => [':default'],
92             #all => [qw(:STDVARS :FUNCS cx2let let2cx logmsg)],
93             all => [qw(:STDVARS :FUNCS)],
94             );
95              
96             # Although the magic globals $row, $rx and friends are "imported" by 'use',
97             # they can not simply alias variables in Spreadsheet::Edit becuase they
98             # need to be tied with parameters specific to the user's package
99             # so that each package has its own 'current sheet'.
100             #
101             # To accomplish this, we use Exporter:Tiny's "generator" mechanism to
102             # create a new, unique variables tied appropriately for each import.
103             # See perldoc Exporter::Tiny::Manual::QuickStart .
104             #
105             # If "methods" named _generate_SUBNAME, _generateScalar_SCALARNAME,
106             # _generateArray_ARRAYNAME or _generateHash_HASHNAME exist in the
107             # exporting package (that's us), then they are called to obtain
108             # a ref to an object to export as SUBNAME, SCALARNAME, ARRAYNAME or HASHNAME.
109             #
110             # For example _generateScalar_rx() is called to get a ref to an $rx variable.
111              
112             sub __gen_x {
113 30     30   88 my ($myclass, $sigilname, $args, $globals, # supplied by Exporter::Tiny
114             $Type, $helpersub, @args) = @_;
115 30 100       82 my $sigl = $Type eq 'Hash' ? '%' : $Type eq 'Array' ? '@' : '$';
    100          
116 30         89 my $tieclassname = 'Tie::Indirect::'.$Type;
117 30         44 my $ref = *{gensym()}{uc($Type)}; # e.g. "ARARY"
  30         99  
118             # e.g. tie $$ref, 'Tie::Indirect::Scalar', \&_scal_tiehelper, ...
119 30         3108 eval "tie ${sigl}\$ref, \$tieclassname, \$helpersub,
120             \$globals->{into}, \$sigilname, \@args";
121 30 50       613 die $@ if $@;
122 30         168 $ref
123             }
124             sub __gen_scalar {
125 6     6   22 my ($myclass, $sigilname, $args, $globals,
126             $canon_ident, $onlyinapply) = @_;
127 6         20 __gen_x($myclass, $sigilname, $args, $globals,
128             "Scalar", \&Spreadsheet::Edit::_scal_tiehelper,
129             $canon_ident, $onlyinapply);
130             }
131 3     3   287 sub _generateScalar_num_cols { __gen_scalar(@_, "num_cols") }
132 3     3   298 sub _generateScalar_rx { __gen_scalar(@_, "current_rx", 1) }
133              
134             #sub _generateScalar_title_rx { __gen_scalar(@_, "title_rx") }
135             sub _generateScalar_title_rx {
136             __gen_x(@_, "Scalar",
137             sub{
138 0     0   0 my ($mutating, $pkg, $uvar, $onlyinapply)=@_;
139 0         0 my $sheet = &Spreadsheet::Edit::__getsheet(@_);
140             \$$sheet->{title_rx}
141 0     3   0 }, 0 # onlyinapply
  3         5186  
142             )
143             }
144              
145             sub __gen_aryelem {
146 6     6   23 my ($myclass, $sigilname, $args, $globals,
147             $index_ident, $array_ident, $onlyinapply) = @_;
148             # N.B. _aryelem_tiehelper has special logic for 'current_rx' and 'title_rx'
149 6         25 __gen_x($myclass, $sigilname, $args, $globals,
150             "Scalar", \&Spreadsheet::Edit::_aryelem_tiehelper,
151             $index_ident, $array_ident, $onlyinapply);
152             }
153 3     3   256 sub _generateScalar_title_row { __gen_aryelem(@_, "title_rx", "rows") }
154 3     3   257 sub _generateScalar_linenum { __gen_aryelem(@_, "current_rx", "linenums", 1) }
155              
156             sub __gen_hash {
157 6     6   21 my ($myclass, $sigilname, $args, $globals,
158             $field_ident, $onlyinapply) = @_;
159 6         19 __gen_x($myclass, $sigilname, $args, $globals,
160             "Hash", \&Spreadsheet::Edit::_refval_tiehelper,
161             $field_ident, $onlyinapply, 0); # mutable => 0
162             }
163 3     3   260 sub _generateHash_colx { __gen_hash(@_, "colx", 0) }
164 3     3   263 sub _generateHash_colx_desc { __gen_hash(@_, "colx_desc", 0) }
165              
166             sub __gen_array{
167 9     9   27 my ($myclass, $sigilname, $args, $globals,
168             $field_ident, $onlyinapply, $mutable) = @_;
169 9         30 __gen_x($myclass, $sigilname, $args, $globals,
170             "Array", \&Spreadsheet::Edit::_refval_tiehelper,
171             $field_ident, $onlyinapply, $mutable);
172             }
173 3     3   265 sub _generateArray_rows { __gen_array(@_, "rows", 0, 1) }
174              
175             # Currently @linenums is not mutable but maybe it should be?
176 3     3   260 sub _generateArray_linenums { __gen_array(@_, "linenums", 0) }
177              
178             ## FIXME: is meta_info still valid?
179 3     3   298 sub _generateArray_meta_info { __gen_array(@_, "meta_info", 0) }
180              
181             sub __get_currentrow {
182 0     0   0 my ($mutating, $pkg, $uvar) = @_;
183             # Ignore mutating, as it applies to the element not the container
184 0         0 my $sheet = &Spreadsheet::Edit::__getsheet(0, $pkg, $uvar, 1);
185             # Returns the dual-typed ref (Magicrow) for the current row
186 0         0 $$sheet->{rows}->[$$sheet->{current_rx}]
187             }
188             sub _generateArray_crow { # @crow aliases the current row during apply
189 3     3   258 my ($myclass, $sigilname, $args, $globals) = @_;
190 3         5 my $aref = *{gensym()}{ARRAY};
  3         13  
191             tie @$aref, 'Tie::Indirect::Array',
192 3         51 \&__get_currentrow, $globals->{into}, $sigilname ;
193 3         36 $aref
194             }
195             sub _generateHash_crow { # %crow indexes cells in the current row during apply
196 3     3   228 my ($myclass, $sigilname, $args, $globals) = @_;
197 3         6 my $href = *{gensym()}{HASH};
  3         10  
198             tie %$href, 'Tie::Indirect::Hash',
199 3         49 \&__get_currentrow, $globals->{into}, $sigilname ;
200 3         36 $href
201             }
202             #
203             ########################### End of Exporting stuff ##########################
204              
205 3         15 use Spreadsheet::Edit::Log qw/log_call fmt_call log_methcall fmt_methcall oops/,
206 3     3   1510 ':btw=SE${lno}:' ;
  3         7  
207              
208 3     3   22 use Data::Dumper ();
  3         6  
  3         103  
209 3     3   19 use Data::Dumper::Interp 6.007 qw/:all/;
  3         59  
  3         18  
210              
211 3     3   77285 use Carp;
  3         9  
  3         414  
212             our @CARP_NOT = qw(Spreadsheet::Edit
213             Spreadsheet::Edit::IO
214             Tie::Indirect::Array Tie::Indirect::Hash
215             Tie::Indirect::Scalar
216             );
217              
218 3     3   25 use Scalar::Util qw(looks_like_number openhandle reftype refaddr blessed);
  3         8  
  3         221  
219 3     3   20 use List::Util qw(min max sum0 first any all pairs pairgrep);
  3         6  
  3         213  
220 3     3   828 use File::Temp qw(tempfile tempdir);
  3         18113  
  3         207  
221 3     3   22 use File::Basename qw(basename dirname fileparse);
  3         7  
  3         159  
222 3     3   20 use File::Find ();
  3         6  
  3         76  
223 3     3   17 use Symbol qw(gensym);
  3         5  
  3         141  
224 3     3   19 use POSIX qw(INT_MAX);
  3         21  
  3         34  
225 3     3   729 use Guard qw(scope_guard);
  3         489  
  3         225  
226              
227             require Tie::Indirect; # OUR CUSTOM STUFF: stop using it?
228              
229 3     3   5041 use Text::CSV 1.90_01; # 1st version with known_attributes()
  3         55623  
  3         192  
230 3         897 use Spreadsheet::Edit::IO qw(
231             OpenAsCsv @sane_CSV_read_options @sane_CSV_write_options
232             convert_spreadsheet
233 3     3   2045 sheetname_from_spec filepath_from_spec form_spec_with_sheetname);
  3         14  
234              
235 3   33     330 use constant _CALLER_OVERRIDE_CHECK_OK =>
236             (defined(&Carp::CALLER_OVERRIDE_CHECK_OK)
237 3     3   28 && &Carp::CALLER_OVERRIDE_CHECK_OK);
  3         6  
238              
239             # sub __mytraceback() { ... } archived in commit 482e09b6009
240              
241 3     3   23 use constant DEFAULT_WRITE_ENCODING => 'UTF-8';
  3         7  
  3         28313  
242             #use constant DEFAULT_READ_ENCODINGS => 'UTF-8,windows-1252';
243              
244             # This global is used by logmsg() to infer the current sheet if an apply is
245             # active, even if logmsg is called indirectly via another pkg
246             our $_inner_apply_sheet; # see &_apply_to_rows
247              
248             # The "current sheet", to which tied globals refer in any given package.
249             our %pkg2currsheet;
250              
251 24     24   50 sub __looks_like_aref($) { eval{ 1+scalar(@{$_[0]}) } } #actual or overloaded
  24         50  
  24         66  
252              
253             # Utility FUNCTIONS
254             #
255             sub to_array(@) { @_ != 1 ? @_ # 0 or multiple values
256 0         0 : ref($_[0]) eq "ARRAY" ? @{$_[0]}
257 4 0   4 0 24 : ref($_[0]) eq "HASH" ? @{ %{$_[0]} } # (key, value, ...)
  0 0       0  
  0 50       0  
258             : $_[0] # just 1 value
259             }
260 0     0 0 0 sub to_aref(@) { [ to_array(@_) ] }
261 0 0   0 0 0 sub to_wanted(@) { goto &to_array if wantarray; goto &to_aref }
  0         0  
262              
263             sub to_hash(@) {
264 4 50 33 4 0 28 @_==1 && ref($_[0]) eq "HASH" ? $_[0] :
    50          
265             (@_ % 2)!=0 ? croak("odd arg count, expecting key => value pairs") :
266             { to_array(@_) }
267             }
268              
269             # Currently these are also exported by Spreadsheet::Edit::IO
270             # completely redundant!
271             sub cx2let(_) { # default arg is $_
272 52     52 0 83 my $cx = shift;
273 52         74 my $ABC="A"; ++$ABC for (1..$cx);
  52         112  
274 52         141 return $ABC
275             }
276             sub let2cx(_) {
277 0     0 0 0 my $ABC = shift;
278 0         0 my $n = ord(substr($ABC,0,1,"")) - ord('A');
279 0         0 while (length $ABC) {
280 0         0 my $letter = substr($ABC,0,1,"");
281 0         0 $n = (($n+1) * 26) + (ord($letter) - ord('A'));
282             }
283 0         0 return $n;
284             }
285              
286             # Produce the "automatic alias" identifier for an arbitrary title
287             sub title2ident($) {
288 32     32 0 49 local $_ = shift;
289 32         62 s/^\s+//; s/\s+$//; s/\W/_/g; s/^(?=\d)/_/;
  32         61  
  32         59  
  32         58  
290 32         68 $_
291             }
292              
293             # Format list as "word,word,..." without parens ; Non-barewords are "quoted".
294 0     0   0 sub __fmt_uqlist(@) { join(",",map{quotekey} @_) }
  0         0  
295 0     0   0 sub __fmt_uqarray(@) { "(" . &__fmt_uqlist . ")" }
296              
297             # Format list as without parens with barewords in qw/.../
298             sub __fmt_uqlistwithqw(@) {
299 0     0   0 my $barewords;
300 0         0 my $s = "";
301 0         0 foreach (map{quotekey} @_) {
  0         0  
302 0 0       0 if (/^\w/) {
303 0 0       0 if ($barewords++) {
304 0         0 $s .= " ";
305             } else {
306 0 0       0 $s .= ", " if $s;
307 0         0 $s .= "qw/";
308             }
309             } else {
310 0 0       0 if ($barewords) {
311 0         0 $s .= "/";
312 0         0 $barewords = 0;
313             }
314 0 0       0 $s .= ", " if $s;
315             }
316 0         0 $s .= $_;
317             }
318 0 0       0 $s .= "/" if $barewords;
319 0         0 $s
320             }
321 0     0   0 sub __fmt_uqarraywithqw(@) { "(" . &__fmt_uqlistwithqw . ")" }
322              
323             # Format list of pairs as "key1 => val1, key2 => val2, ..." without parens
324             sub __fmt_pairlist(@) {
325 0     0   0 my $result = "";
326 0         0 while (@_) {
327 0 0       0 confess "Odd arg count, expecting key => value pairs" if @_==1;
328 0 0       0 $result .= ", " if $result;
329 0         0 my $key = shift @_;
330 0         0 my $val = shift @_;
331 0         0 $result .= quotekey($key)." => ".vis($val);
332             }
333             $result
334 0         0 }
335             sub __fmt_pairs(@) {
336 0     0   0 __fmt_pairlist( map{ @$_ } @_ );
  0         0  
337             }
338              
339             # Concatenate strings separated by spaces, folding as necessary
340             # (strings are never broken; internal newlines go unnoticed).
341             # All lines (including the first) are indented the specified number of
342             # spaces. Explicit line-breaks may be included as "\n".
343             # A final newline is *not* included unless the last item ends with "\n".
344             sub __fill($;$$) {
345 0     0   0 my ($items, $indent, $foldwidth) = @_;
346 0   0     0 $indent //= 4;
347 0   0     0 $foldwidth //= 72;
348 0         0 my $buf = "";
349 0         0 my $llen = 0;
350 0         0 foreach (@$items) {
351 0 0 0     0 if ($_ eq "\n" or
      0        
352             ($llen > $indent && ($llen + length($_)) > $foldwidth)) {
353 0         0 $buf .= "\n";
354 0         0 $llen = 0;
355 0 0       0 next if $_ eq "\n";
356             }
357 0 0       0 if ($llen == 0) {
358 0         0 $buf .= (" " x $indent);
359 0         0 $llen = $indent;
360             } else {
361 0 0       0 if (substr($buf,-1) =~ /\S/) {
362 0         0 $buf .= " ";
363 0         0 ++$llen;
364             }
365             }
366 0         0 $buf .= $_;
367 0         0 $llen += length();
368             }
369 0         0 $buf;
370             }
371              
372             sub __fmt_colspec_cx($$) {
373             # "cx NN" or "COLSPEC [cx NN]" or "COLSPEC (NOT DEFINED)" if undef cx
374 0     0   0 my ($colspec, $cx) = @_;
375 0 0       0 if (ref($colspec) eq "Regexp") {
376 0         0 state $delimsets = [
377             [qw(/ /)], [qw({ })], [qw([ ])], [qw<( )>], [qw(< >)], [qw(« »)] ];
378 0         0 for (@$delimsets) {
379 0         0 my ($left, $right) = @$_;
380 0 0 0     0 if (index($colspec,$left)<0 && index($colspec,$right)<0) {
381 0         0 $colspec = "qr${left}${colspec}${right}";
382 0         0 last;
383             }
384             }
385             } else {
386 0         0 $colspec = visq($colspec);
387             }
388 0 0       0 return "$colspec (NOT DEFINED)"
389             if ! defined $cx;
390 0 0       0 $colspec eq "$cx" ? "cx $cx" : "$colspec [cx $cx]"
391             }
392             sub __fmt_cx($) {
393 28     28   50 my ($cx) = @_;
394 28 50       78 defined($cx) ? "cx $cx=".cx2let($cx) : "(undefined)"
395             }
396              
397             # Format %colx keys "intelligently". cx values are not shown for keys which are
398             # absolute column refs. Keys with undef values (from alias {optional => 1})
399             # are omitted since they are not currently valid. A final newline IS included.
400             sub _fmt_colx(;$$) {
401             my $self = shift;
402             my ($indent, $foldwidth) = @_;
403             my ($colx, $num_cols) = @$$self{qw{colx num_cols}};
404             # copy %$colx omitting keys with undef cx
405             my %hash = map{ defined($colx->{$_}) ? ($_ => $colx->{$_}) : () } keys %$colx;
406             my sub sortbycx(@) { sort { ($colx->{$a}//-1) <=> ($colx->{$b}//-1) } @_ }
407             my sub subset($) { # format items, deleting from %hash
408             my $specs = shift;
409             my (@items, $curr, $curr_desc);
410             my $curr_cx = -1;
411             my sub flush() {
412             return unless $curr_cx >= 0;
413             push @items, $curr.$curr_desc;
414             $curr = $curr_desc = undef; ##DEBUGGING
415             $curr_cx = -1;
416             }
417             my sub additem($$) {
418             (local $_, my $cx) = @_;
419             flush() if $curr_cx != $cx;
420             if ($curr_cx >= 0) {
421             $curr .= ",".quotekey($_);
422             } else {
423             $curr_cx = $cx;
424             $curr = quotekey($_);
425             my $misfit = (/^[A-Z]{1,2}$/ && $colx->{$_} != let2cx($_))
426             || (/^\d+$/ && $colx->{$_} != $_)
427             || (/^\D../) # show titles with cx too
428             ;
429             $curr_desc = $misfit ? "(cx ".vis($hash{$_}).")" : "";
430             }
431             }
432             foreach (@$specs) {
433             if (ref $_) {
434             push @items, $$_; # \"string" means insert "string" literally
435             } else {
436             # Work around old Perl lexical sub limitations...
437             my $item=$hash{$_};
438             oops(dvis '$_ $specs hash=',hvis(%hash)) unless defined $item;
439             additem($_, $item);
440             delete $hash{$_} // oops;
441             }
442             }
443             flush();
444             push @items, "\n" if @items; # wrap before next subset, or at end
445             @items
446             }
447             my @ABCs = subset [ map{ my $A = cx2let($_);
448             u($hash{$A}) eq $_ ? $A : \" "
449             } 0..$num_cols-1 ];
450              
451             # More lexical sub bug work-arounds...
452             my @ss1 = subset [sortbycx grep{ /^(=.*\D)\w+$/ } keys %hash]; # normal titles
453             my @ss2 = subset [sortbycx grep{ /^\d+$/ } keys %hash]; # numeric titles
454             my @ss3 = subset [sortbycx keys %hash]; # oddities
455             __fill [
456             @ABCs,
457             @ss1,
458             @ss2,
459             @ss3,
460             ], $indent, $foldwidth;
461             }
462              
463             # Is a title a special symbol or looks like a cx number?
464             sub __unindexed_title($$) {
465 14     14   26 my ($title, $num_cols) = @_;
466 14 50       30 oops unless defined $title;
467 14 50 33     139 $title eq ""
      33        
      33        
      33        
468             || $title eq '^'
469             || $title eq '$'
470             || ( ($title =~ /^[1-9]\d*$/ || $title eq "0") # not with leading zeros
471             && $title <= $num_cols )
472             }
473             sub _unindexed_title { #method for use by tests
474 0     0   0 my $self = shift;
475 0         0 __unindexed_title(shift(), $$self->{num_cols});
476             }
477              
478             # Return (normals, unindexed) where each is [title => cx, ...] sorted by cx
479             sub _get_usable_titles {
480 6     6   12 my $self = shift;
481 6         18 my ($rows, $title_rx, $num_cols) = @$$self{qw{rows title_rx num_cols}};
482 6   33     40 my $title_row = $rows->[$title_rx // oops];
483 6         19 my @unindexed;
484             my @normals;
485 6         0 my %seen;
486 6         18 for my $cx (0 .. $num_cols-1) {
487 14         34 my $title = $title_row->[$cx];
488 14 50       38 next if $title eq "";
489 14 50       42 if ($seen{$title}++) {
490 0 0       0 $self->_carponce("Warning: Non-unique title ", visq($title), " will not be usable for COLSPEC\n") unless $$self->{silent};
491 0         0 @normals = grep{ $_->[0] ne $title } @normals;
  0         0  
492 0         0 @unindexed = grep{ $_->[0] ne $title } @unindexed;
  0         0  
493 0         0 next;
494             }
495 14 50       34 if (__unindexed_title($title, $num_cols)) {
496 0         0 push @unindexed, [$title, $cx];
497             } else {
498 14         46 push @normals, [$title, $cx];
499             }
500             }
501 9         45 [sort { $a->[1] <=> $b->[1] } @normals],
502 6         30 [sort { $a->[1] <=> $b->[1] } @unindexed]
  0         0  
503             }
504              
505             # Non-OO api: Explicitly create a new sheet and make it the "current sheet".
506             # Options (e.g. to specify initial content) may be specified in an
507             # initial {OPTHASH} and/or as linear key => value pairs.
508             #
509             # Note: Most functions automatically create an empty sheet if no sheet
510             # exists, so this is only really needed when using more than one sheet
511             # or if you want to initialize a sheet from data in memory.
512             # N.B. the corresponding OO interface is Spreadsheet::Edit->new(...)
513             #
514             sub new_sheet(@) {
515 2     2 1 9355 my $opthash = &__opthash;
516 2         7 my %opts = (%$opthash, %{to_hash(@_)}); # new() merges these anyway
  2         7  
517              
518 2         7 my ($userpkg, $fn, $lno, $subname) = @{ __filter_frame(__usercall_info()) };
  2         6  
519 2 50       11 $userpkg = delete $opts{package} if exists $opts{package};
520              
521 2 50       9 my $pkgmsg = $opts{package} ? " [for pkg $userpkg]" : "";
522 2 50 33     15 croak "Invalid 'package' ",u($userpkg),"\n"
523             unless defined($userpkg) && $userpkg =~ /^[a-zA-Z][:\w]*$/a;
524              
525 2   33     7 $opts{data_source} ||= "Created at ${fn}:$lno by $subname";
526              
527 2         6 my $sheet = __silent_new(\%opts);
528              
529             log_call [$opthash, @_], [\(fmt_sheet($sheet), \$pkgmsg)]
530 2 50       7 if $$sheet->{verbose};
531              
532 2         11 $pkg2currsheet{$userpkg} = $sheet
533             }
534              
535             # logmsg() - Concatenate strings to form a "log message", possibly
536             # prefixed with a description of a "focus" sheet and optionally
537             # a specific row. A final \n is appended if needed.
538             #
539             # The "focus" sheet and row, if any, are determined as follows:
540             #
541             # If the first argument is a sheet object, [sheet_object],
542             # [sheet_object, rx], or [sheet_object, undef] then the indicated
543             # sheet and (optionally) row are used. Note that if called as a method
544             # the first arg will be the sheet object.
545             #
546             # Otherwise the first arg is not special and is included in the message.
547             #
548             # If no sheet is identified above, then the caller's package active
549             # sheet is used, if any.
550             #
551             # If still no sheet is identified, then the sheet of the innermost apply
552             # currently executing (anywhere up the stack) is used, if any; this sheet
553             # is internally saved in a global by the apply* methods.
554             #
555             # If a sheet is identified but no specific rx, then the
556             # "current row" of an active apply on that sheet is used, if any.
557             #
558             # If a focus sheet or sheet & row were identified, then the caller-supplied
559             # message is prefixed by "(<description>):" or "(row <num> <description>):"
560             # where <description> comes from:
561             #
562             # 1) If the sheet attribute {logmsg_pfx_gen} is defined to a subref,
563             # the sub is called and all returned items other than undef are
564             # concatenated (any undefs in the returned list are ignored); otherwise
565             #
566             # 2) The "sheetname" property is used, if defined; otherwise
567             #
568             # 3) the "data_source" property is used, which defaults to the name of the
569             # spreadsheet read by read_spreadsheet().
570             #
571             # FIXME: I should either rename logmsg_pfx_gen as logmsg_sheetdesc_gen
572             # to reflect that it only generated the sheet-description part,
573             # or else make prefix generators produce the entire message prefix
574             # including any row number.
575             #
576             sub _default_pfx_gen($$) {
577 274     274   573 my ($sheet, $rx) = @_;
578 274 50       1316 confess "bug" unless ref($sheet) =~ /^Spreadsheet::Edit\b/;
579 274 50       673 ($sheet->sheetname() || $sheet->data_source())
580             }
581             sub logmsg(@) {
582 285     285 1 291011 my ($sheet, $rx);
583 285 100 100     1466 if (@_ > 0 && ref($_[0])) {
584 208 100 33     829 if (ref($_[0]) =~ /^Spreadsheet::Edit\b/) {
    50 33        
585 24         54 $sheet = shift;
586             }
587             elsif (ref($_[0]) eq "ARRAY"
588 184         962 && @{ $_[0] } <= 2
589             && ref($_[0]->[0])."" =~ /^Spreadsheet::Edit\b/) {
590 184         294 ($sheet, $rx) = @{ shift @_ };
  184         484  
591             }
592             }
593 285         647 my $msgstr = join("", grep{defined} @_);
  320         973  
594 285 100       617 if (! defined $sheet) {
595 77         211 $sheet = $pkg2currsheet{scalar(caller)};
596             }
597 285 100       590 if (! defined $sheet) {
598 22         43 $sheet = $Spreadsheet::Edit::_inner_apply_sheet;
599             }
600 285 100       593 if (! defined $rx) {
601 125 100       282 $rx = eval{ $sheet->rx() } if defined($sheet);
  114         318  
602             }
603 285         464 my @prefix;
604 285 100       552 if (defined $sheet) {
605 274         550 push @prefix, "(";
606 274 100 100     812 if (defined($rx) && ($rx < 0 || $rx > $#{$sheet->rows})) {
      100        
607 80         251 push @prefix, "Row ".($rx+1)."[INVALID RX $rx] ";
608 80         126 $rx = undef; # avoid confusing user's prefix generator
609             } else {
610 194 100       583 push @prefix, "Row ".($rx+1)." " if defined($rx);
611             }
612 274   50     673 my $pfxgen = $sheet->attributes->{logmsg_pfx_gen} // \&_default_pfx_gen;
613 274         685 push @prefix, &$pfxgen($sheet, $rx), "): ";
614             }
615 285 100       877 my $suffix = ($msgstr =~ /\n\z/s ? "" : "\n");
616             # If the message string starts with newlines, put them before the prefix
617 285 50       705 if ($msgstr =~ s/\A(\n+)([^\n])/$2/s) {
618 0         0 unshift @prefix, $1;
619             }
620 285         528 return join "", grep{defined} @prefix, $msgstr, $suffix;
  1593         4524  
621             }
622              
623             #####################################################################
624             # Locate the nearest call to a public sub in the call stack.
625             # [Now *GENERIC*]
626             #
627             # A "public" sub means anything named starting with [a-z], irrespective
628             # of package, and not mentioned in $USERCALL_NOT.
629             # The assumption is that internal subs are named with an initial
630             # underscore or are ALLCAPS (e.g. for constants).
631             #
632             # RETURNS
633             # ([frame], [called args]) in array context
634             # [frame] in scalar context
635             #
636             # "frame" means caller(n) results:
637             # 0 1 2 3
638             # package filename linenum subname ...
639             #
640             sub __usercall_info(;$) {
641 15     15   45 for (my $lvl=1 ; ; ++$lvl) {
642 53         120 @DB::args = \$lvl if _CALLER_OVERRIDE_CHECK_OK; # see mytraceback()
643 53         81 my @frame = do{ package
644 53         477 DB; caller($lvl) };
645 53 50       147 oops dvis('$lvl @frame') unless defined($frame[0]);
646 53 100       310 if ( $frame[3] =~ /::([a-z][^:]*)/ ) {
647 15 100       154 return \@frame unless wantarray;
648 2         4 my @args;
649 2         5 my $hasargs = $frame[4];
650 2 50       5 if ($hasargs) {
651 2         3 eval{ @args = @DB::args };
  2         9  
652 2 50       8 @args=("<perl bug(?) prevented getting args>") if $@;
653             }
654 2         8 return (\@frame, \@args)
655             }
656             }
657             }
658              
659             sub __filter_frame($) { #clean-up/abbreviate for display purposes
660 5     5   12 my @frame = @{shift @_};
  5         19  
661 5         169 $frame[1] = basename $frame[1]; # filename
662 5         37 $frame[3] =~ s/.*:://; # subname
663             \@frame
664 5         28 }
665             sub __fn_ln_methname() {
666 3     3   7 @{ __filter_frame(__usercall_info()) }[1,2,3]; # (fn, lno, subname)
  3         19  
667             }
668              
669             sub __methname() {
670 0     0   0 (&__fn_ln_methname())[2]
671             }
672              
673             sub __find_userpkg() {
674 10     10   40 ${ __usercall_info() }[0];
  10         37  
675             }
676              
677             # This always returns the caller's caller's package but also
678             # checks that it is not an internal call, which should never happen
679             sub __callerpkg() {
680 1     1   9 my $pkg = (caller(1))[0];
681 1 50       7 oops if index($pkg,__PACKAGE__) == 0;
682 1         7 $pkg
683             }
684              
685             # Create a new object without allowing any logging. This is used when
686             # new() is called implicitly by something else and new's log messages
687             # might display an internal filename (namely Edit.pm).
688             #
689             # debug/verbose args are removed from the arguments passed to new()
690             # and put back into the object after it is created.
691             sub __silent_new(@) {
692 2     2   7 my $opthash = &__opthash;
693 2         6 my $new_args = to_hash(@_);
694              
695 2         3 my %saved;
696 2         6 foreach my $key (qw/verbose debug/) {
697 4 50       15 $saved{$key} = $opthash->{$key} if exists($opthash->{$key});
698 4         9 $opthash->{$key} = 0; # force off
699 4 50       10 $saved{$key} = delete($new_args->{$key}) if exists($new_args->{$key});
700             }
701              
702 2         13 my $self = Spreadsheet::Edit->new($opthash, %$new_args);
703              
704 2         13 delete @$$self{qw/verbose debug/};
705 2         7 $self->_set_verbose_debug_silent(%saved);
706              
707 2         6 $self
708             }
709              
710             #####################################################################
711             # Get "self" for a function/method combo sub:
712             # If the first arg is an object ref we shift it off and use that
713             # (i.e. assume it is called as a method); otherwise we assume it's a
714             # functional-API function call and use the caller's "current sheet"
715             # (if none exists, __self creates one but __selfmust throws).
716             #
717             # This must be used with special syntax like
718             # my $self = &__self;
719             # which re-uses @_ so we can shift @_ as seen by our caller.
720              
721             sub __self_ifexists {
722              
723             # If the first arg is an object ref, shift it off and return it;
724             # Otherwise, if the caller's "current sheet" exists, return that;
725             # otherwise return undef.
726              
727             (defined(blessed($_[0])) && $_[0]->isa(__PACKAGE__) && shift(@_))
728 1120 100 66 1120   8636 || $pkg2currsheet{__find_userpkg()};
      66        
729             }
730             sub __selfmust { # sheet must exist, otherwise throw
731 841 50   841   1473 &__self_ifexists || do{
732 0         0 my $pkg = caller(1);
733 0         0 croak __methname()," : No sheet is defined in $pkg\n"
734             };
735             }
736              
737             sub __self { # a new empty sheet is created if necessary
738 279 50   279   564 &__self_ifexists || do{
739             # Create a new empty sheet and make it the caller's "current sheet".
740 0         0 my %opts;
741 0         0 my ($frame, $args) = __usercall_info();
742              
743 0         0 my ($userpkg, $fn, $lno, $subname) = @{ __filter_frame($frame) };
  0         0  
744 0         0 $opts{data_source} = "(Created implicitly by '$subname' at ${fn}:$lno)";
745              
746 0         0 my $self = $pkg2currsheet{$userpkg} = __silent_new(\%opts);
747 0         0 $self
748             }
749             }
750              
751              
752             ## Helpers...
753              
754             sub __opthash {
755 35 100   35   131 ref($_[0]) eq "HASH" ? shift(@_) : {}
756             }
757             sub __selfmust_opthash {
758 8     8   25 my $self = &__selfmust;
759 8         24 my $opthash = &__opthash;
760 8         25 ($self, $opthash)
761             }
762             sub __self_opthash {
763 5     5   10 my $self = &__self;
764 5         17 my $opthash = &__opthash;
765 5         20 ($self, $opthash)
766             }
767             sub __selfonly {
768 274     274   509 my $self = &__self;
769 274 50       657 confess __methname, " expects no arguments!\n" if @_;
770 274         1225 $self
771             }
772             sub __selfmustonly {
773 553     553   974 my $self = &__selfmust;
774 553 50       1259 confess __methname, " expects no arguments!\n" if @_;
775 553         2181 $self
776             }
777              
778             sub __self_opthash_Nargs($@) { # (num_expected_args, @_)
779 2     2   3 my $Nargs = shift;
780 2         6 my ($self, $opthash) = &__self_opthash;
781             #croak
782 2 50       6 croak __methname, " expects $Nargs arguments, not ",scalar(@_),"\n"
783             if $Nargs != @_;
784 2         6 ($self, $opthash, @_)
785             }
786 0     0   0 sub __self_opthash_0args { unshift @_,0; goto &__self_opthash_Nargs }
  0         0  
787 2     2   7 sub __self_opthash_1arg { unshift @_,1; goto &__self_opthash_Nargs }
  2         7  
788 0     0   0 sub __self_opthash_2args { unshift @_,2; goto &__self_opthash_Nargs }
  0         0  
789 0     0   0 sub __self_opthash_3args { unshift @_,3; goto &__self_opthash_Nargs }
  0         0  
790              
791             # Check that an option hash has only valid keys, and values aren't undef
792             sub __validate_opthash($$;@) {
793 10     10   52 my ($opthash, $valid_keys, %opts) = @_;
794 10 50       28 return unless defined $opthash; # silently accept undef
795 10         33 foreach my $k (keys %$opthash) {
796             croak "Unrecognized ",($opts{desc}//"option")," '$k'"
797 1 50 0 1   9 unless first{$_ eq $k} @$valid_keys;
  1         4  
798             confess "Option '$k' may not be 'undef'"
799             if $opts{undef_ok_only} && !defined($opthash->{$k})
800 1 50 33     12 && !grep{$_ eq $k} @{$opts{undef_ok_only}};
  0   33     0  
  0         0  
801             }
802             $opthash
803 10         28 }
804              
805             # Copy verbose/debug/silent options into $self, deleting them from
806             # the provided options hash.
807             # RETURNS: Hash of original values to pass to _restore_stdopts()
808             #
809             # This is used by methods which accept {verbose} etc. options
810             # which override what is in the object for the duration of that method call.
811             sub _set_stdopts {
812 10     10   25 my ($self, $opthash) = @_;
813 10         22 my $previous = {};
814 10         29 foreach my $key (qw/verbose debug silent/) {
815 30 100       70 if (exists $opthash->{$key}) {
816 2         7 $previous->{$key} = $$self->{$key};
817 2         7 $$self->{$key} = delete($opthash->{$key});
818             }
819             }
820             $previous
821 10         25 }
822             sub _restore_stdopts {
823 10     10   18 my $self = shift;
824 10         18 my $saved = shift;
825 10         98 @$$self{keys %$saved} = values %$saved;
826             }
827              
828             sub _validate_ident($) {
829 0 0   0   0 croak "identifier is undef!" unless defined $_[0];
830 0 0       0 croak "identifier is empty" unless $_[0] ne "";
831 0 0       0 croak ivisq '"$_[0]" is not a valid identifier\n'
832             unless $_[0] eq title2ident($_[0]);
833 0         0 $_[0]
834             }
835              
836             # Check that an option hash has only valid keys
837             sub __validate_4pthash($$;$) {
838 0     0   0 my ($opthash, $valid_keys, $optdesc) = @_;
839 0 0       0 return unless defined $opthash; # silently accept undef
840 0         0 foreach my $k (keys %$opthash) {
841             croak "Unrecognized ",($optdesc//"option")," '$k'"
842 0 0 0 0   0 unless first{$_ eq $k} @$valid_keys;
  0         0  
843             }
844             $opthash
845 0         0 }
846              
847             sub __validate_nonnegi($;$) {
848 4 50 0 4   34 croak(($_[1]//"argument")." must be a non-negative integer",
      33        
849             " (not ".u($_[0]).")")
850             unless defined($_[0]) && "$_[0]" =~ /^\d+$/;
851 4         11 $_[0]
852             }
853             sub __validate_nonnegi_or_undef($;$) {
854 0 0 0 0   0 croak(($_[1]//"argument")." must be a non-negative integer or undef",
      0        
855             " (not ".u($_[0]).")")
856             unless !defined($_[0]) || "$_[0]" =~ /^\d+$/;
857 0         0 $_[0]
858             }
859              
860             sub __validate_pairs(@) {
861 4 50   4   18 unless ((scalar(@_) % 2) == 0) {
862 0 0       0 croak __methname," does not accept an {OPTIONS} hash here"
863             if (ref($_[0]) eq "HASH");
864 0         0 confess "In call to ",__methname,
865             " : uneven arg count, expecting key => value pairs"
866             }
867 4         33 foreach (pairs @_) {
868 1         5 my $key = $_->[0];
869 1 50       6 confess "In call to ",__methname," the key '$key' looks suspicious"
870             unless $key =~ /^\w+$/;
871             }
872             @_
873 4         22 }
874              
875             sub _check_rx {
876 0     0   0 my ($self, $rx, $one_past_end_ok) = @_;
877 0 0 0     0 confess __methname.": Illegal rx ",vis($rx),"\n"
878             unless ($rx//"") =~ /^\d+$/; # non-negative integer
879 0         0 my $maxrx = $#{$$self->{rows}};
  0         0  
880 0 0       0 confess __methname.": rx ".vis($rx)." is beyond the last row\n"
    0          
881             .dvis(' $$self')
882             if $rx > ($one_past_end_ok ? ($maxrx+1) : $maxrx);
883             }
884              
885             # Diagnose scalar context if there are no results.
886             sub __first_ifnot_wantarray(@) {
887 0     0   0 my $wantarray = (caller(1))[5];
888 0 0       0 return @_ if $wantarray;
889 0 0       0 return $_[0] if @_;
890 0 0       0 croak __methname, " called in scalar context but that method does not return a result.\n"
891             if defined($wantarray);
892             }
893             sub __validate_not_scalar_context(@) {
894 0     0   0 my $wantarray = (caller(1))[5];
895 0 0 0     0 croak __methname, " returns an array, not a scalar"
896             unless $wantarray || !defined($wantarray);
897             @_
898 0         0 }
899              
900             sub _carponce { # if not silent
901 0     0   0 my $self = shift;
902 0         0 my $msg = join "",@_;
903 0 0       0 return if $$self->{_carponce}->{$msg}++;
904 0 0       0 $msg .= "\n" unless $msg =~ /\n\z/s;
905             carp($msg)
906 0 0       0 unless $$self->{silent}; # never appears even if silent is later unset
907             }
908              
909             ###################### METHODS/FUNCTIONS #######################
910              
911             # Unlike other methods, new() takes key => value pair arguments.
912             # For consistency with other methods an initial {OPTIONS} hash is
913             # also allowed, but it is not special in any way and is merged
914             # with any linear args (linear args override {OPTIONS}).
915              
916             sub new { # Strictly OO, this does not affect caller's "current sheet".
917             # The corresponding functional API is new_sheet() which explicitly
918             # creates a new sheet and makes it the 'current sheet'.
919 4     4 1 73 my $classname = shift;
920 4 50 33     37 croak "Invalid/missing CLASSNAME (i.e. \"this\") arg"
921             unless defined($classname) && $classname =~ /^[\w_:]+$/;
922              
923 4         13 my $opthash = &__opthash;
924             # Special handling of {cmd_nesting) since there was no object to begin with:
925             # Internal callers may pass this as a "user" option in {OPTARGS};
926             # we won't log it, but we plant it into the object below.
927             # **THE CALLER MUST DECREMENT IT LATER IF NEEDED*
928 4   50     36 my $cmd_nesting = delete($opthash->{cmd_nesting}) // 0;
929              
930 4         21 my %opts = (verbose => $Verbose, debug => $Debug, silent => $Silent,
931             %$opthash,
932             __validate_pairs(@_));
933              
934 4         11 my $self;
935 4 50       15 if (my $clonee = delete $opts{clone}) { # untested as of 2/12/14
936 0         0 delete @opts{qw/verbose debug silent/};
937 0 0       0 croak "Other options not allowed with 'clone': ",hvis(%opts) if %opts;
938 0         0 require Clone;
939 0         0 $self = Clone::clone($clonee); # in all its glory
940             $$self->{data_source} = (delete $opts{data_source})
941 0   0     0 // "cloned from $$self->{data_source}";
942             } else {
943             my $hash = {
944             attributes => delete $opts{attributes} // {},
945             linenums => delete $opts{linenums} // [],
946             meta_info => delete $opts{meta_info} // [], ##### ???? obsolete ???
947             data_source => delete $opts{data_source}, # // "(none)",
948             num_cols => delete $opts{num_cols}, # possibly undef
949              
950             # %colx maps titles, aliases (automatic and user-defined), and
951             # spreadsheet column lettercodes to the corresponding column indicies.
952 4   50     70 colx => {},
      50        
      50        
953             colx_desc => {}, # for use in error messages
954             useraliases => {}, # key exists for user-defined alias names
955              
956             title_rx => undef,
957             current_rx => undef, # valid during apply()
958              
959             pkg2tiedvarnames => {},
960             pkg2tieall => {},
961             };
962              
963             # We can not use $hash directly as the object representation because %{}
964             # is overloaded, so we use a scalar ref (pointing to the hashref)
965             # as the object.
966 4         14 $self = bless \$hash, $classname;
967              
968             # Create a tied virtual array which creates Magicrows when assigned to.
969 4         6 my @rows; tie @rows, 'Spreadsheet::Edit::RowsTie', $self;
  4         67  
970 4         22 $hash->{rows} = \@rows;
971              
972 4 100       16 if (my $newdata = delete $opts{rows}) {
973 2         8 foreach (@$newdata) {
974 6         30 push @rows, $_;
975             }
976             }
977             }# not cloning
978              
979 4         54 $$self->{cmd_nesting} = $cmd_nesting;
980              
981 4         24 $self->_set_verbose_debug_silent(%opts); # croaks if other keys remain
982              
983             # Validate data, default num_cols, pad rows, etc.
984 4         38 $self->_rows_replaced();
985              
986             log_call [$opthash,@_], [\fmt_sheet($self)]
987 4 50       15 if $$self->{verbose};
988              
989 4         16 $self
990             }#new
991              
992             use overload
993             # As an ARRAYref, a sheet acts like \@rows which is (a ref to) a
994             # virtual array of Magicrow objects, each of which is a dual array/hash
995             # ref to cells in a given row (via RowsTie).
996 0     0   0 '@{}' => sub { my $hash = ${ shift() }; $hash->{rows}; },
  0         0  
  0         0  
997              
998             # As a HASHref, a sheet acts like \%crow which is (a ref to)
999             # the hash view of the current row during 'apply'
1000 0     0   0 '%{}' => sub { my $self = shift;
1001             # probably less efficient but avoids repeating code
1002 0         0 \%{ $self->crow() };
  0         0  
1003             },
1004             #'""' => sub { shift },
1005             #'0+' => sub { shift },
1006             #'==' => sub { my ($self, $other, $swap) = @_; $self == $other },
1007             #'eq' => sub { my ($self, $other, $swap) = @_; "$self" eq "$other" },
1008 3         34 fallback => 1,
1009 3     3   33 ;
  3         8  
1010              
1011             sub _rows_replaced { # completely new or replaced rows, linenums, etc.
1012 6     6   15 my ($self) = @_;
1013 6         17 my $hash = $$self;
1014              
1015             my ($rows, $linenums, $num_cols, $current_rx)
1016 6         20 = @$hash{qw/rows linenums num_cols current_rx/};
1017              
1018 6 50       17 croak "Can not replace sheet content during an apply!\n"
1019             if defined $current_rx;
1020 6         24 for my $rx (0..$#$rows) {
1021 12         35 my $row = $rows->[$rx];
1022 12 50       31 croak "rows must contain refs to arrays of cells (row $rx is $row)"
1023             unless __looks_like_aref($row);
1024 12         23 for my $cx (0..$#$row) {
1025 30 50       54 croak "New cell at Row ",$rx+1," Column ",cx2let($cx)," contains a ref"
1026             if ref($row->[$cx]);
1027             }
1028             }
1029             croak '\'linenums\' if present must be (a ref to) an array of numbers, ',
1030             ' "" or "???"', ivis('\nnot $linenums\n')
1031             unless ref($linenums) eq "ARRAY"
1032 6 50 33 6   96 && all{ defined() and looks_like_number($_) || !/[^\?]/ } @$linenums;
  6 50 33     26  
1033              
1034 6 100       36 if (@$rows) { # Determine num_cols and pad short rows
1035 4         9 my $nc = 0;
1036 4 100       12 foreach (@$rows) { $nc = @$_ if @$_ > $nc }
  12         36  
1037 4 50 33     13 if ($num_cols && $num_cols != $nc) {
1038 0         0 croak "num_cols=$num_cols was specified along with initial data, but\n",
1039             "the value doesn't match the data (which has up to $nc columns)\n"
1040             } else {
1041 4         11 $hash->{num_cols} = $num_cols = $nc;
1042             }
1043             # Pad short rows with empty fields
1044 4         11 foreach my $row (@$rows) {
1045 12         25 push @$row, (("") x ($num_cols - @$row));
1046             }
1047 4         10 $#$linenums = $#$rows;
1048 4   100     12 foreach (@$linenums) { $_ //= '???' };
  12         43  
1049             } else {
1050             # There is no data. Default num_cols to zero, but leave any
1051             # user-supplied value so a subsequent insert_rows() will know how
1052             # many columns to create.
1053 2   50     9 $hash->{num_cols} //= 0;
1054             }
1055             #oops unless $hash->{data_source};
1056 6 50       19 croak "#linenums ($#$linenums) != #rows ($#$rows)\n",
1057             dvis '$hash->{linenums}\n$hash->{rows}'
1058             unless @$linenums == @$rows;
1059              
1060 6         17 $hash->{title_rx} = undef;
1061 6         16 $hash->{useraliases} = {};
1062 6         49 $self->_rebuild_colx; # Set up colx colx_desc
1063 6         15 $self
1064             }#_rows_replaced
1065              
1066             #########################################################
1067             # Combination FUNCTION/METHOD
1068             # These are declared with signatures for use as functional-API FUNCTIONs
1069             # which use the caller's "current sheet" as the implicit object.
1070             #
1071             # However they may also be called as METHODs with an explicit object.
1072             #########################################################
1073              
1074             # Allow user to find out names of tied variables
1075             sub tied_varnames(;@) {
1076 0     0 0 0 my ($self, $opts) = &__selfmust_opthash;
1077 0   0     0 my $pkg = $opts->{package} // __callerpkg();
1078 0   0     0 my $h = $$self->{pkg2tiedvarnames}->{$pkg} //= {};
1079 0         0 return keys %$h;
1080             }
1081              
1082             # Internal: Tie specified variables into a package if not already tied.
1083             # Returns:
1084             sub __TCV_REDUNDANT() { 1 } # if all were already tied
1085             sub __TCV_OK() { 2 } # otherwise (some tied, or no idents specified)
1086             #
1087             sub _tie_col_vars {
1088 3     3   8 my $self = shift;
1089 3         5 my $pkg = shift;
1090 3         6 my $parms = shift;
1091             # Remaining arguments are idents
1092              
1093 3         9 my ($safe, $file, $lno) = @$parms;
1094 3 100       13 my @safecheck_pkgs = $pkg eq "main" ? ($pkg) : ($pkg, "main");
1095              
1096             my ($colx, $colx_desc, $debug, $silent)
1097 3         26 = @$$self{qw/colx colx_desc debug silent/};
1098              
1099             # FIXME: BUG/ISSUE ...
1100             # Why is it correct to keep tiedvarnames PER-SHEET ?
1101             # Isn't this a global property of each package?
1102              
1103 3   50     44 my $tiedvarnames = ($$self->{pkg2tiedvarnames}->{$pkg} //= {});
1104              
1105 3 50 33     31 if (@_ > 0 && %$tiedvarnames) {
1106             SHORTCUT: {
1107 0         0 foreach (@_) {
  0         0  
1108 0 0       0 last SHORTCUT unless exists $tiedvarnames->{$_};
1109             }
1110 0         0 return __TCV_REDUNDANT;
1111             }
1112             }
1113              
1114             VAR:
1115 3         12 foreach (sort {$a->[0] <=> $b->[0]} # sort for ease of debugging
  26         51  
1116             map {
1117 16         33 my $cx = $colx->{$_};
1118             defined($cx)
1119 16 50       61 ? [ $cx, $_, $colx_desc->{$_} ]
1120             : [ 999999, $_, "(currently NOT DEFINED)" ];
1121             } @_
1122             )
1123             {
1124 16         157 my ($cx, $ident, $desc) = @$_;
1125 16 50       70 oops unless $ident =~ /^\w+$/;
1126              
1127 16 50       40 if (exists $tiedvarnames->{$ident}) {
1128 0 0       0 $self->_log(" Previously tied: \$${pkg}::${ident}\n") if $debug;
1129             next
1130 0         0 }
1131              
1132 16 50       37 btw dvis '##ZZ $ident $safe ${^GLOBAL_PHASE} @safecheck_pkgs\n' if $debug;
1133              
1134 3     3   4522 no strict 'refs';
  3         10  
  3         436  
1135 16 100       39 if ($safe) {
1136 8 50       26 if (${^GLOBAL_PHASE} ne "START") {
1137 0 0       0 $self->_carponce("Not tieing new variables because :safe was used and this is not (any longer) during compile time\n") unless $silent;
1138 0         0 return __TCV_REDUNDANT; ### IMMEDIATE EXIT ###
1139             }
1140 8         13 foreach my $p (@safecheck_pkgs) {
1141             # Per 'man perlref' we can not use *foo{SCALAR} to detect a never-
1142             # declared SCALAR (it's indistinguishable from an existing undef var).
1143             # So we must insist that the entire glob does not exist.
1144 16 50       30 btw ivis ' Checking pkg $p ident $ident ...\n' if $debug;
1145 3     3   23 no strict 'refs';
  3         8  
  3         516  
1146 16 50       20 if (exists ${$p.'::'}{$ident}) {
  16         51  
1147 0         0 my $msg = <<EOF ;
1148             COLSPEC '$ident' clashes with an existing object in package '$p' .
1149             This is dis-allowed when tie_column_vars was called with option :safe,
1150             in this case at ${file}:${lno} . In this situation you can not
1151             explicitly declare the tied variables, and they must be tied and
1152             imported before the compiler sees them.
1153              
1154             Note: The clash might not be with a scalar \$$ident, but something else
1155             named '${ident}' in the same package (array, hash, sub, filehandle,
1156             etc.) Unfortunately it is not possible to distinguish a non-existent
1157             scalar from a declared scalar containing an undef value
1158             (see *foo{SCALAR} in 'man perlref'). Therefore, to be safe,
1159             nothing is allowed to pre-exist with the name '${ident}'.
1160             EOF
1161             # We can detect anything other than an undef scalar
1162 0         0 local $Data::Dumper::Maxdepth = 2;
1163 0         0 local $Data::Dumper::Interp::Maxdepth = 2;
1164 0         0 my $fqname = $p."::".$ident;
1165 3     3   24 no strict 'refs';
  3         6  
  3         34107  
1166 0 0       0 if (my $r = *$fqname{SCALAR}) {
1167 0 0       0 if (defined($$r)) {
1168 0         0 $msg .= "\nExisting *Defined* Scalar \$$ident = ".vis($$r)."\n";
1169             }
1170             }
1171 0 0       0 if (my $r = *$fqname{ARRAY}) {
1172 0         0 $msg .= "\nExisting Array \@$fqname = ".avis(@$r)."\n";
1173             }
1174 0 0       0 if (my $r = *$fqname{HASH}) {
1175 0         0 $msg .= "\nExisting Hash \%$fqname = ".hvis(%$r)."\n";
1176             }
1177 0 0       0 if (my $r = *$fqname{CODE}) {
1178 0         0 $msg .= "\nExisting $fqname = ".Data::Dumper->new([$r])->Terse(1)->Indent(0)->Deparse(1)->Dump()."\n";
1179             }
1180 0 0       0 if (my $r = *$fqname{IO}) {
1181 0         0 $msg .= "\nExisting IO object $fqname (file/dir handle, etc.)\n";
1182             }
1183 0 0       0 if (eval{ require Devel::Symdump; }) {
  0         0  
1184 0         0 my $obj = Devel::Symdump->new($p);
1185 0         0 foreach (qw/FILEHANDLE.s DIRHANDLE.s UNKNOWN.s PACKAGE.s/) {
1186 0 0       0 /^(.+)\.(.*)$/ or die; my $kind = lc $1; my $methname = lc $1.$2;
  0         0  
  0         0  
1187 0 0       0 if (grep{$_ eq $ident} $obj->$methname()) {
  0         0  
1188 0         0 $msg .= "\n Found existing $kind object $fqname\n";
1189             }
1190             }
1191             }
1192 0         0 croak "\n$msg\n";
1193             }
1194             }
1195             }
1196              
1197 16 50       32 $self->_log("tie \$${pkg}::${ident} to $desc\n") if $debug;
1198              
1199 16         31 $tiedvarnames->{$ident} = 1;
1200              
1201 16         22 *{"$pkg\::$ident"} = \${ *{gensym()} };
  16         234  
  16         20  
  16         42  
1202              
1203 16         35 tie ${"${pkg}::$ident"}, 'Tie::Indirect::Scalar',
  16         70  
1204             \&_tiecell_helper, $pkg, $ident;
1205             }
1206 3         35 return __TCV_OK;
1207             }
1208             sub _tiecell_helper {
1209 2     2   86 my($mutating, $pkg, $ident) = @_;
1210 2   33     15 my $sheet = $pkg2currsheet{$pkg}
1211             // croak "No sheet is currently valid for package $pkg\n";
1212 2         25 $sheet->_onlyinapply("tied variable \$$ident");
1213              
1214             # WRONG... it croaks bc sheet->{rows}->[rx] is a rowhash which doesn't like undef keys
1215             # WRONG: This returns \undef if $ident is not currently valid
1216 2         12 \( $$sheet->{rows}->[$$sheet->{current_rx}]->{$ident} )
1217             }
1218              
1219             sub _all_valid_idents {
1220 3     3   5 my $self = shift;
1221 3         6 my %valid_idents;
1222 3         5 foreach (keys %{ $$self->{colx} }) {
  3         18  
1223 18 50       57 if (/^(?:REGERROR|REGMARK|AUTOLOAD)$/) {
1224 0         0 $self->_carponce("WARNING: Column key ",visq($_)," conflicts with a Perl built-in; variable will not be tied.\n");
1225 0         0 next;
1226             }
1227 18         40 $valid_idents{ title2ident($_) } = 1;
1228             }
1229 3         24 return keys %valid_idents;
1230             }
1231              
1232             sub tie_column_vars(;@) {
1233 3     3 1 16 my ($self, $opts) = &__self_opthash;
1234             # Any remaining args specify variable names matching
1235             # alias names, either user-defined or automatic.
1236              
1237 3 50       11 croak "tie_column_vars without arguments (did you intend to use ':all'?)"
1238             unless @_;
1239              
1240 3   33     29 local $$self->{silent} = $opts->{silent} // $$self->{silent};
1241 3   33     21 local $$self->{verbose} = $opts->{verbose} // $$self->{verbose};
1242 3   33     16 local $$self->{debug} = $opts->{debug} // $$self->{debug};
1243              
1244 3   66     14 my $pkg = $opts->{package} // __callerpkg();
1245              
1246 3         6 my (%tokens, @varnames);
1247 3 50       9 foreach (@_) { if (/:/) { $tokens{$_} = 1 } else { push @varnames, $_ } }
  5         16  
  5         14  
  0         0  
1248 3         9 foreach (@varnames) {
1249 0 0       0 croak "Invalid variable name '$_'\n" unless /^\$?\w+$/;
1250 0         0 s/^\$//;
1251             }
1252              
1253             # With ':all' tie all possible variables, now and in the future.
1254             #
1255             # CURRENTLY UNDOCUMENTED: With the ":safe" token, a check is made
1256             # that variables do not already exist immediately before tying them;
1257             # otherwise an exception is thrown.
1258             #
1259             # When ':safe' is combined with ':all', variables will not be checked & tied
1260             # except during compile time, i.e. within BEGIN{...}. Therefore a
1261             # malicious spreadsheet can not cause an exception after the compilation
1262             # phase.
1263 3         8 my $safe = delete $tokens{':safe'};
1264 3         11 my ($file, $lno) = __fn_ln_methname();
1265 3         11 my $parms = [$safe, $file, $lno];
1266              
1267             # Why? Obsolete? Only for :all?? [note added Dec22]
1268             # FIXME TODO: is this bogus?
1269 3 50       13 $self->title_rx($opts->{title_rx}) if exists $opts->{title_rx};
1270              
1271 3 50       10 if (delete $tokens{':all'}) {
1272             # Remember parameters for tie operations which might occur later
1273 3         13 $$self->{pkg2tieall}->{$pkg} = $parms;
1274              
1275 3         15 push @varnames, sort $self->_all_valid_idents;
1276             }
1277 3 50       12 croak "Unrecognized token in arguments: ",avis(keys %tokens) if %tokens;
1278              
1279 3         13 my $r = $self->_tie_col_vars($pkg, $parms, @varnames);
1280              
1281 3 50       20 my $pfx = ($r == __TCV_REDUNDANT ? "[ALL REDUNDANT] " : "");
1282             log_methcall $self, [\$pfx, \__fmt_uqarraywithqw(keys %tokens, @varnames),
1283             \" in package $pkg"]
1284 3 50       92 if $$self->{verbose};
1285             }#tie_column_vars
1286              
1287             #
1288             # Accessors for misc. sheet data
1289             #
1290              
1291 274     274 1 440 sub attributes(@) { ${&__selfonly}->{attributes} }
  274         544  
1292 0     0 1 0 sub colx() { ${&__selfmustonly}->{colx} }
  0         0  
1293 0     0 1 0 sub colx_desc() { ${&__selfmustonly}->{colx_desc} }
  0         0  
1294             sub data_source(;$) {
1295 274     274 1 511 my $self = &__selfmust;
1296 274 50       673 if (@_ == 0) { # 'get' request
1297 274 50       650 log_methcall $self, [], [$$self->{data_source}] if $$self->{verbose};
1298             return $$self->{data_source}
1299 274         1074 }
1300 0 0       0 log_methcall $self, [@_] if $$self->{verbose};
1301 0 0       0 croak "Too many args" unless @_ == 1;
1302 0         0 $$self->{data_source} = $_[0];
1303 0         0 $self
1304             }
1305 0     0 1 0 sub linenums() { ${&__selfmustonly}->{linenums} }
  0         0  
1306 0     0 1 0 sub num_cols() { ${&__selfmustonly}->{num_cols} }
  0         0  
1307 161     161 1 239 sub rows() { ${&__selfmustonly}->{rows} }
  161         344  
1308 274     274 1 396 sub sheetname() { ${&__selfmustonly}->{sheetname} }
  274         492  
1309              
1310 0     0 0 0 sub iolayers() { ${&__selfmustonly}->{iolayers} }
  0         0  
1311 0     0 0 0 sub meta_info() {${&__selfmustonly}->{meta_info} }
  0         0  
1312             sub input_encoding() {
1313             # Emulate old API. We actually store input_iolayers instead now,
1314             # so as to include :crlf if necessary.
1315 0     0 0 0 my $self = &__selfmustonly;
1316 0         0 local $_;
1317             return undef unless
1318 0         0 exists(${$self}->{input_iolayers})
1319 0 0 0     0 && ${$self}->{input_iolayers} =~ /encoding\(([^()]*)\)/;
  0         0  
1320 0         0 return $1;
1321             }
1322              
1323             # See below for title_rx()
1324             sub title_row() {
1325 0     0 1 0 my $self = &__selfmustonly;
1326 0         0 my $title_rx = $$self->{title_rx};
1327 0 0       0 my $r = defined($title_rx) ? $$self->{rows}->[$title_rx] : undef;
1328 0 0       0 log_methcall $self, [], [$r] if $$self->{verbose};
1329 0         0 $r
1330             }
1331 118     118 1 194 sub rx() { ${ &__selfmustonly }->{current_rx} }
  118         239  
1332             sub crow() {
1333 0     0 1 0 my $self = &__selfmustonly;
1334 0         0 ${ $self->_onlyinapply("crow() method") }->{rows}->[$$self->{current_rx}]
  0         0  
1335             }
1336             sub linenum() {
1337 0     0 1 0 my $self = &__selfmustonly;
1338 0   0     0 my $current_rx = $$self->{current_rx} // return(undef);
1339 0         0 $$self->{linenums}->[$current_rx];
1340             }
1341             sub _getref {
1342 0     0   0 my ($self, $rx, $ident) = @_;
1343 0         0 my ($rows, $colx) = @$$self{qw/rows colx/};
1344 0 0 0     0 croak "get/set: rx $rx is out of range" if $rx < 0 || $rx > $#$rows;
1345 0         0 my $row = $$self->{rows}->[$rx];
1346 0         0 my $cx = $colx->{$ident};
1347 0 0 0     0 oops("Invalid cx ".vis($cx)) if ! defined($cx) || $cx < 0 || $cx > $#$row;
      0        
1348 0         0 \$row->[$cx];
1349             }
1350             # get/set a cell given by (rx,COLSPEC)
1351             sub get($$) {
1352 0     0 1 0 my $self = &__selfmust;
1353 0         0 my $ref = $self->_getref(@_);
1354 0         0 $$ref;
1355             }
1356             sub set($$$) {
1357 0     0 1 0 my $self = &__selfmust;
1358 0         0 my ($rx, $colspec, $newval) = @_;
1359 0         0 my $ref = $self->_getref($rx, $colspec);
1360 0         0 $$ref = $newval;
1361 0         0 $self
1362             }
1363              
1364             sub __validate_sheet_arg($) {
1365 12     12   22 my $sheet = shift;
1366 12 50 33     141 croak "Argument '${\u($sheet)}' is not a Spreadsheet::Edit sheet object"
  0   66     0  
1367             if defined($sheet) and
1368             !blessed($sheet) || !$sheet->isa("Spreadsheet::Edit");
1369 12         35 $sheet;
1370             }
1371              
1372             #---- logging ----------------------------------
1373              
1374             # Print segmented log messages:
1375             # Join args together, prefixing with "> " or ">> " etc.
1376             # unless the previous call did not end with newline.
1377             # Maintains internal state. A final call with an ending \n must occur.
1378             sub _log {
1379 0     0   0 my $self = shift;
1380 0         0 state $in_midst;
1381             print STDERR join "",
1382             ($in_midst ? "" : (">" x ($$self->{cmd_nesting}||1))),
1383 0 0 0     0 map{u} @_;
  0         0  
1384 0         0 $in_midst = ($_[$#_] !~ /\n\z/s);
1385             }
1386              
1387             my $trunclen = 200;
1388             sub fmt_sheet($) {
1389 0     0 0 0 my $sheet = shift;
1390 0 0       0 return "undef" unless defined($sheet);
1391             #oops unless ref($sheet) ne "" && $sheet->isa(__PACKAGE__);
1392 0         0 local $$sheet->{verbose} = 0;
1393 0   0     0 my $desc = $sheet->sheetname() || $sheet->data_source() || "no data_source";
1394 0 0       0 if (length($desc) > $trunclen) { $desc = substr($desc,($trunclen-3))."..." }
  0         0  
1395 0         0 my $r = addrvis($sheet)."($desc)";
1396 0         0 $r
1397             }
1398              
1399             sub _logmethifv { # args: just linearized INPUT items
1400 0 0   0   0 return unless ${$_[0]}->{verbose};
  0         0  
1401 0         0 my $self = shift;
1402 0         0 log_methcall $self, [@_];
1403             }
1404              
1405             sub _logmethretifv { # args: [INPUTS], [OUTPUTS]
1406 8 50   8   30 oops if @_ > 3; # (self, items, retvals)
1407 8 50       14 return unless ${$_[0]}->{verbose};
  8         33  
1408 0         0 goto &log_methcall
1409             }
1410             #-----------------------------------------------
1411              
1412             my $__FILE__ = __FILE__;
1413             my $__PACKAGE__ = __PACKAGE__;
1414              
1415             sub _call_usercode($$$) {
1416 6     6   15 my ($self, $code, $cxlist) = @_;
1417              
1418 6 50       19 if (@$cxlist) {
1419 0         0 my $row = $self->crow();
1420 0         0 for ($row->[$cxlist->[0]]) { # bind $_ to the first-specified column
1421 0         0 eval{ &$code(@$row[@$cxlist]) };
  0         0  
1422             }
1423             } else {
1424 6         17 eval{ $code->() };
  6         22  
1425             ##Simplify backtraces
1426             #@_ = ();
1427             #goto &$code;
1428             }
1429 6 50       1358 if ($@) { # filter out our internal stuff to make easier to read
1430 0         0 { local $_;
  0         0  
1431             $@ =~ s/^[^\n]* called \Kat $__FILE__ line .*?(?=\s*${__PACKAGE__}::\w*apply)/from [apply internals]/msg
1432 0 0       0 unless $$self->{debug};
1433             }
1434 0         0 die "$@\n";
1435             }
1436             }
1437              
1438             # Do apply, handling COLSPEC args.
1439             # If $rxlists or $rxfirst & $rxlast are undef, visit all rows.
1440             sub _apply_to_rows($$$;$$$) {
1441 6     6   27 my ($self, $code, $cxlist, $rxlist, $rxfirst, $rxlast) = @_;
1442 6         21 my $hash = $$self;
1443 6         30 my ($linenums,$rows,$num_cols,$cl) = @$hash{qw/linenums rows num_cols/};
1444              
1445 6 50       37 croak $self->_methmsg("Missing or incorrect {code} argument")
1446             unless ref($code) eq "CODE";
1447 6         28 foreach (@$cxlist) {
1448 0 0 0     0 if ($_ < 0 || $_ >= $num_cols) {
1449 0         0 croak $self->_methmsg("cx $_ is out of range")
1450             }
1451             }
1452              
1453             { # Temp save "current_rx" from an enclosing apply
1454 6         11 local $hash->{current_rx} = undef;
  6         22  
1455              
1456             # Temp update "current apply sheet" for logmsg()
1457 6         29 local $_inner_apply_sheet = $self;
1458              
1459 6 100       28 if (defined $rxlist) {
1460 5         25 foreach my $rx (@$rxlist) {
1461 5 50 33     45 croak "rx $rx is out of range"
1462             if $rx < 0 || $rx > $#$rows;
1463 5         25 $hash->{current_rx} = $rx;
1464 5         38 _call_usercode($self,$code,$cxlist);
1465             }
1466             } else {
1467             # Do not cache $#$rows so user can call insert_rows() or delete_rows()
1468 1   50     7 for (my $rx = $rxfirst // 0;
      33        
      66        
1469             $rx <= $#$rows && (!defined($rxlast) || $rx <= $rxlast);
1470             $rx++)
1471             {
1472 1         4 $hash->{current_rx} = $rx;
1473 1         5 _call_usercode($self,$code,$cxlist);
1474 1         6 $rx = $hash->{current_rx}; # might have been changed by delete_rows()
1475             }
1476             }
1477             }
1478              
1479             croak "After completing apply, an enclosing apply was resumed, but",
1480             " current_rx=",$hash->{current_rx}," now points beyond the last row!\n"
1481 6 50 33     59 if defined($hash->{current_rx}) && $hash->{current_rx} > $#$rows;
1482 6         28 $self
1483             }#_apply_to_rows
1484              
1485             # Rebuild %colx and %colx_desc, and tie any required new variables.
1486             #
1487             # User-defined column aliases must already be valid in %colx;
1488             # all other entries are deleted and re-created.
1489             #
1490             # Note: the special '^', '$' and numieric cx values (if in 0..num_cols)
1491             # are handled algorithmically in _specs2cxdesclist() before consulting %colx.
1492             #
1493             # When building %colx, conflicts are resolved using these priorities:
1494             #
1495             # User-defined aliases (ALWAYS valid)
1496             # Titles (if unique)
1497             # Trimmed titles (with leading & trailing spaces removed)
1498             # Automatic aliases
1499             # ABC letter-codes
1500             #
1501             # Warnings are issued once for each conflict.
1502             sub _rebuild_colx {
1503             my $self = shift;
1504             my $notie = $_[0]; # true during autodetect probing
1505              
1506             my ($silent, $colx, $colx_desc, $useraliases, $num_cols, $title_rx,
1507             $rows, $debug, $pkg2tieall)
1508             = @$$self{qw/silent colx colx_desc useraliases num_cols title_rx
1509             rows debug pkg2tieall/};
1510              
1511             # Save user-defined Aliases before wiping %colx
1512             my %useralias;
1513             foreach my $alias (keys %$useraliases) {
1514             my $cx = $colx->{$alias};
1515             # cx may be undef if referenced column was deleted and/or if
1516             # an alias was created with {optional => TRUE} with non-matching regex.
1517             #next if !defined($cx); # the referenced column was deleted
1518             $useralias{$alias} = [$cx, $colx_desc->{$alias}];
1519             }
1520              
1521             # Now re-generate
1522             %$colx = ();
1523             %$colx_desc = ();
1524              
1525             my sub __putback($$$;$) {
1526             my ($key, $cx, $desc, $nomasking) = @_;
1527             if (defined (my $ocx = $colx->{$key})) {
1528             if ($cx != $ocx) {
1529             oops if $nomasking; # _get_usable_titles should have screen out
1530             $self->_carponce("Warning: ", visq($key), " ($desc) is MASKED BY (",
1531             $colx_desc->{$key}, ")") unless $silent;
1532             }
1533             } else {
1534             oops if exists $colx->{$key};
1535             $colx->{$key} = $cx;
1536             $colx_desc->{$key} = $desc;
1537             }
1538             }
1539              
1540             # Put back the user aliases
1541             while (my ($alias,$aref) = each %useralias) {
1542             my ($cx, $desc) = @$aref;
1543             __putback($alias, $cx, $desc);
1544             }
1545              
1546             if (defined $title_rx) {
1547             # Add non-conflicting titles
1548             my ($normal_titles, $unindexed_titles) = $self->_get_usable_titles;
1549             foreach (@$normal_titles) {
1550             my ($title, $cx) = @$_;
1551             __putback($title, $cx, __fmt_cx($cx).": Title", 1); # nomasking==1
1552             }
1553             # Titles with leading & trailing spaces trimmed off
1554             foreach (@$normal_titles) {
1555             my ($title, $cx) = @$_;
1556             my $key = $title;
1557             $key =~ s/\A\s+//s; $key =~ s/\s+\z//s;
1558             if ($key ne $title) {
1559             __putback($key, $cx, __fmt_cx($cx).": Title sans lead/trailing spaces",1);
1560             }
1561             }
1562             # Automatic aliases
1563             # N.B. These come from all titles, not just "normal" ones
1564             foreach (@$normal_titles, @$unindexed_titles) {
1565             my ($title, $cx) = @$_;
1566             my $ident = title2ident($title);
1567             __putback($ident, $cx, __fmt_cx($cx).": Automatic alias for title");
1568             }
1569             }
1570             my %abc;
1571             foreach my $cx ( 0..$num_cols-1 ) {
1572             my $ABC = cx2let($cx);
1573             __putback($ABC, $cx, "cx $cx: Standard letter-code");
1574             }
1575              
1576             unless ($notie) {
1577             # export and tie newly-defined magic variables to packages which want that.
1578             if (my @pkglist = grep {defined $pkg2tieall->{$_}} keys %$pkg2tieall) {
1579             my @idents = $self->_all_valid_idents;
1580             foreach my $pkg (@pkglist) {
1581             $self->_tie_col_vars($pkg, $pkg2tieall->{$pkg}, @idents);
1582             }
1583             }
1584             }
1585             } # _rebuild_colx
1586              
1587             # Move and/or delete column positions. The argument is a ref to an array
1588             # containing the old column indicies of current (i.e. surviving) columns,
1589             # or undefs for new columns which did not exist previously.
1590             sub _adjust_colx {
1591 0     0   0 my ($self, $old_colxs) = @_;
1592             my ($colx, $colx_desc, $num_cols, $useraliases, $debug)
1593 0         0 = @$$self{qw/colx colx_desc num_cols useraliases debug/};
1594 0 0       0 oops unless @$old_colxs == $num_cols;
1595 0         0 my %old2new;
1596 0         0 foreach my $new_cx (0..$#$old_colxs) {
1597 0         0 my $old_cx = $old_colxs->[$new_cx];
1598 0 0       0 $old2new{$old_cx} = $new_cx if defined $old_cx;
1599             }
1600             # User-defined aliases are for arbitrary columns, so fix them manually
1601 0         0 foreach my $alias (keys %$useraliases) {
1602 0         0 my $cx = $colx->{$alias};
1603 0 0       0 next unless defined $cx; # e.g. non-unique title; see _rebuild_colx()
1604 0 0       0 if (defined (my $new_cx = $old2new{$cx})) {
1605 0 0       0 warn ">adjusting colx{$alias} : $colx->{$alias} -> $new_cx\n" if $debug;
1606 0         0 $colx->{$alias} = $new_cx;
1607             } else {
1608 0 0       0 warn ">deleting colx{$alias} (was $colx->{$alias})\n" if $debug;
1609 0         0 delete $colx->{$alias};
1610 0         0 delete $colx_desc->{$alias};
1611 0         0 delete $useraliases->{$alias};
1612             }
1613             }
1614             # Everything else is derived from actual titles
1615 0         0 $self->_rebuild_colx();
1616             }
1617              
1618             # Translate list of COLSPECs to a list of [cx,desc].
1619             # Regexes may match multiple columns.
1620             # THROWS if a spec does not indicate any existing column.
1621             sub _specs2cxdesclist {
1622 0     0   0 my $self = shift;
1623 0         0 my ($colx, $colx_desc, $num_cols) = @$$self{qw/colx colx_desc num_cols/};
1624 0         0 my @results;
1625 0         0 foreach my $spec (@_) {
1626 0 0       0 croak "Column specifier is undef!" unless defined $spec;
1627 0 0       0 if ($spec eq '^') {
1628 0         0 push @results, [0, "Special '^' specifier for first col"];
1629             next
1630 0         0 }
1631 0 0       0 if ($spec eq '$') {
1632 0         0 push @results, [$num_cols-1, "Special '\$' specifier for last col"];
1633             next
1634 0         0 }
1635 0 0 0     0 if (($spec =~ /^[1-9]\d*$/ || $spec eq "0")
      0        
1636             && $spec <= $num_cols) { # allow one-past-end
1637 0         0 push @results, [$spec, "Numeric column-index"];
1638             next
1639 0         0 }
1640 0 0       0 if (defined (my $cx = $colx->{$spec})) {
1641 0         0 push @results, [$cx, $colx_desc->{$spec}];
1642             next
1643 0         0 }
1644 0 0       0 if (ref($spec) eq 'Regexp') {
1645 0         0 my ($title_rx, $rows) = @$$self{qw/title_rx rows/};
1646 0 0       0 croak "Can not use regex: No title-row is defined!\n"
1647             unless defined $title_rx;
1648 0   0     0 my $title_row = $rows->[$title_rx] // oops;
1649 0         0 my $matched;
1650 0         0 for my $cx (0..$#$title_row) {
1651 0         0 my $title = $title_row->[$cx];
1652             # Note: We can't use /s here! The regex compiler has already
1653             # encapsulated /s or lack thereof in the compiled regex
1654 0 0       0 if ($title =~ /$spec/) {
1655 0         0 push @results, [$cx, "cx $cx: regex matched title '$title'"];
1656 0         0 $matched++;
1657             }
1658             }
1659 0 0       0 if (! $matched) {
1660 0         0 croak "\n--- Title Row (rx $title_rx) ---\n",
1661             vis($title_row),"\n-----------------\n",
1662             "Regex $spec\n",
1663             "does not match any of the titles (see above) in '",
1664             fmt_sheet($self),"'\n"
1665             # N.B. check for "does not match" in alias()
1666             }
1667             next
1668 0         0 }
1669 0         0 croak "Invalid column specifier '${spec}'\nnum_cols=$num_cols. Valid keys are:\n",
1670             $self->_fmt_colx;
1671             }
1672 0 0       0 oops unless wantarray;
1673             @results
1674 0         0 }#_specs2cxdesclist
1675             sub _spec2cx { # return $cx or ($cx, $desc); throws if spec is invalid
1676 0     0   0 my ($self, $spec) = @_;
1677 0         0 my @list = $self->_specs2cxdesclist($spec);
1678 0 0       0 if (@list > 1) {
1679             croak ivis("Regexpr $spec matches multiple titles:\n "),
1680 0         0 join("\n ",map{ vis $_->[1] } @list), "\n";
  0         0  
1681             }
1682 0         0 __first_ifnot_wantarray( @{$list[0]} ) # cx or (cx,desc)
  0         0  
1683             }
1684              
1685             sub _colspec2cx {
1686 0     0   0 my ($self, $colspec) = @_;
1687 0 0       0 croak "COLSPEC may not be a regex" if ref($colspec) eq 'Regexp';
1688 0         0 goto &_spec2cx
1689             }
1690              
1691             # The user-callable API
1692             # THROWS if a spec does not indicate any existing column.
1693             # Can return multiple results due to multple args and/or Regexp multimatch
1694             # In scalar context returns the first result.
1695             sub spectocx(@) { # the user-callable API
1696 0     0 1 0 my $self = &__selfmust;
1697 0         0 my @list = $self->_specs2cxdesclist(@_);
1698 0         0 __first_ifnot_wantarray( map{ $_->[0] } @list )
  0         0  
1699             }
1700              
1701             # Translate a possibly-relative column specification which
1702             # indicate 1 off the end.
1703             #
1704             # The specification may be
1705             # >something (the column after 'something')
1706             # or
1707             # an absolute column indicator (cx or ABC), possibly 1 off the end
1708             # or
1709             # refer to an existing column
1710             #
1711             sub _relspec2cx {
1712 0     0   0 my ($self, $spec) = @_;
1713 0         0 my $colx = $$self->{colx};
1714 0 0       0 if ($spec =~ /^>(.*)/) {
1715 0         0 my $cx = $self->_colspec2cx($1); # croaks if not an existing column
1716 0         0 return $cx + 1
1717             }
1718 0         0 $self->_colspec2cx($spec); # croaks if not an existing column
1719             }
1720              
1721             sub alias(@) {
1722 0     0 1 0 my $self = &__selfmust;
1723 0 0       0 my $opthash = ref($_[0]) eq 'HASH' ? shift() : {};
1724 0 0       0 if ($opthash) {
1725 0         0 __validate_opthash($opthash, [qw(optional)],
1726             desc => "alias option");
1727             }
1728 0 0       0 croak "'alias' expects an even number of arguments\n"
1729             unless scalar(@_ % 2)==0;
1730              
1731             my ($colx, $colx_desc, $num_cols, $useraliases, $rows, $silent, $debug)
1732 0         0 = @$$self{qw/colx colx_desc num_cols useraliases rows silent debug/};
1733              
1734 0         0 my ($file, $lno) = __fn_ln_methname();
1735              
1736 0         0 my @cxlist;
1737 0         0 while (@_) {
1738 0         0 my $ident = _validate_ident( shift @_ );
1739 0         0 my $spec = shift @_;
1740              
1741 0 0       0 if (my $wheredefined = $useraliases->{$ident}) {
1742 0         0 croak "'$ident' is already a user-defined alias",
1743             " for cx ", scalar($self->_spec2cx($ident)),
1744             " defined at ", $wheredefined, "\n"
1745             #" (for cx ", scalar($self->_spec2cx($ident)), ")"
1746             }
1747             croak "'$ident' is already a user-defined alias (for cx ",
1748             scalar($self->_spec2cx($ident)), ")"
1749 0 0       0 if $useraliases->{$ident};
1750              
1751 0         0 my $cx = eval{ $self->_spec2cx($spec) };
  0         0  
1752 0 0       0 unless(defined $cx) {
1753 0 0       0 oops unless $@;
1754 0 0 0     0 croak $@ unless $opthash->{optional} && $@ =~ /does not match/is;
1755             # Always throw on other errors, e.g. regex matches more than one title
1756             };
1757              
1758             # Log each pair individually
1759 0         0 $self->_logmethifv($opthash, \", $ident => ",\__fmt_colspec_cx($spec,$cx));
1760              
1761 0         0 $colx->{$ident} = $cx;
1762 0         0 $colx_desc->{$ident} = "alias for ".__fmt_cx($cx)." (".quotekey($spec).")";
1763 0         0 $useraliases->{$ident} = "${file}:$lno";
1764 0         0 push @cxlist, $cx;
1765             }
1766 0         0 $self->_rebuild_colx();
1767              
1768 0         0 __first_ifnot_wantarray( @cxlist )
1769             }#alias
1770              
1771             sub unalias(@) {
1772 0     0 1 0 my $self = &__selfmust;
1773 0 0       0 croak __methname, " does not accept an {OPTIONS} hash\n"
1774             if ref($_[0]) eq 'HASH';
1775              
1776             my ($colx, $colx_desc, $useraliases)
1777 0         0 = @$$self{qw/colx colx_desc useraliases/};
1778              
1779 0         0 foreach (@_) {
1780 0   0     0 delete $useraliases->{$_} // croak "unalias: '$_' is not a column alias\n";
1781 0         0 $self->_logmethifv(\" Removing alias $_ => ", \$colx_desc->{$_});
1782 0   0     0 delete $colx->{$_} // oops;
1783 0   0     0 delete $colx_desc->{$_} // oops;
1784             }
1785 0         0 $self->_rebuild_colx();
1786 0         0 $self
1787             }
1788              
1789             # title_rx: Get/set the title row index
1790             #
1791             # $rx = title_rx ; # Retrieve
1792             #
1793             # title_rx undef # Set to no titles
1794             #
1795             # title_rx ROWINDEX # Set to specified rx
1796             #
1797             # title_rx 'auto' # Auto-detect the title row; an exception is thrown
1798             # # if a plausible row is not found.
1799             #
1800             # {OPTARGS} may contain
1801             # verbose, silent, debug (temporarily override the object's settings)
1802             #
1803             # Auto-detect options:
1804             # required => [COLSPEC, ...] # required titles
1805             # min_rx, max_rx => NUM # range of rows which may contain the row
1806             # first_cx => NUM # first column ix which must contain a valid title
1807             # last_cx => NUM # last column ix which must contain a valid title
1808             #
1809             # Note: This is called internally by read_spreadsheet(), passing 'auto'
1810             # by default. Therefore users need not call this method explicitly
1811             # except to change title row or if read_spreadsheet was not used at all.
1812             #
1813             sub title_rx(;$@) {
1814 8     8 1 43 my ($self, $opthash_arg) = &__selfmust_opthash;
1815 8         25 my $opthash = { %$opthash_arg }; # make copy so we can modify it
1816 8         38 my @orig_args = @_;
1817              
1818 8         38 my $saved_stdopts = $self->_set_stdopts($opthash);
1819 8     8   80 scope_guard{ $self->_restore_stdopts($saved_stdopts) };
  8         31  
1820 8         63 __validate_opthash( $opthash,
1821             [qw(required min_rx max_rx first_cx last_cx)],
1822             desc => "autodetect option",
1823             undef_ok_only => [qw/verbose silent debug/] );
1824 8         23 my $rx = -999;
1825 8 100       30 if (@_ == 0) {
1826             # A return value was requested
1827 2 50       22 croak '{OPTARGS} passed to title_rx with no operator (get request?)'
1828             if %$opthash;
1829 2         25 $rx = $$self->{title_rx};
1830 2         39 $self->_logmethretifv([], [$rx]);
1831             } else {
1832             # N.B. undef arg means there are no titles
1833 6         12 $rx = shift;
1834 6 100       10 my $notie; $notie = shift() if u($_[0]) eq "_notie"; # during auto-detect probes
  6         29  
1835 6 50       57 croak "Extraneous argument(s) to title_rx: ".avis(@_) if @_;
1836              
1837 6 50       17 if (defined $rx) {
1838 6 100       43 if ($rx eq 'auto') {
    50          
    50          
1839 1         4 $rx = $self->_autodetect_title_rx($opthash);
1840             }
1841             elsif ($rx !~ /^\d+$/) {
1842 0         0 croak "Invalid title_rx argument: ", visq($rx);
1843             }
1844 5         25 elsif ($rx > $#{ $$self->{rows} }) {
1845 0         0 croak "Rx $rx is beyond the end of the data", visq($rx);
1846             }
1847             }
1848 6         61 $$self->{title_rx} = $rx;
1849 6         30 $self->_logmethretifv([$opthash_arg, @orig_args], [$rx]);
1850 6         36 $self->_rebuild_colx($notie);
1851             }
1852 8         33 $rx;
1853             }#title_rx
1854              
1855             sub _autodetect_title_rx {
1856 1     1   3 my ($self, $opthash) = @_;
1857              
1858             my ($title_rx, $rows, $colx, $num_cols, $verbose, $debug) =
1859 1         8 @$$self{qw(title_rx rows colx num_cols verbose debug)};
1860              
1861 1 50       4 if ($#$rows == -1) {
1862 0         0 return undef; # completely empty
1863             }
1864              
1865             # Filter out titles which can not be used as a COLSPEC
1866             my @required_specs = $opthash->{required}
1867 1 50       4 ? to_array($opthash->{required}) : ();
1868 1 50       4 croak "undef value in {required}" if grep{! defined} @required_specs;
  0         0  
1869 1         3 @required_specs = grep{ !__unindexed_title($_, $num_cols) } @required_specs;
  0         0  
1870              
1871 1   50     8 my $min_rx = __validate_nonnegi($opthash->{min_rx}//0, "min_rx");
1872 1   33     17 my $max_rx = __validate_nonnegi($opthash->{max_rx}//$min_rx+3, "max_rx");
1873 1 50       5 $max_rx = $#$rows if $max_rx > $#$rows;
1874              
1875 1   50     7 my $first_cx = __validate_nonnegi($opthash->{first_cx}//0, "first_cx");
1876 1   50     7 my $last_cx = __validate_nonnegi($opthash->{last_cx}//INT_MAX, "last_cx");
1877 1 50       4 $last_cx = $num_cols-1 if $last_cx >= $num_cols;
1878              
1879 1         3 my @nd_reasons;
1880 1 50       2 if ($min_rx > $#$rows) {
    50          
1881 0         0 push @nd_reasons, "min_rx ($min_rx) is out of range";
1882             }
1883             elsif ($min_rx > $max_rx) {
1884 0         0 push @nd_reasons,
1885             "min_rx ($min_rx) is greater than max_rx ($max_rx)"
1886             }
1887 1 50       6 if ($first_cx >= $num_cols) {
    50          
1888 0         0 push @nd_reasons, "first_cx ($first_cx) is out of range"
1889             }
1890             elsif ($first_cx > $last_cx) {
1891 0         0 push @nd_reasons,
1892             "first_cx ($first_cx) is less than last_cx ($last_cx)"
1893             }
1894              
1895 1         3 my $detected;
1896 1 50       3 unless (@nd_reasons) {
1897             # no logging during trial and error (except with debug)
1898 1         5 local $$self->{verbose} = $debug;
1899 1         12 local $$self->{silent} = !$debug;
1900 1         5 RX: for my $rx ($min_rx .. $max_rx) {
1901 1 50 33     11 warn "# ",$nd_reasons[-1],"\n" if $debug && @nd_reasons;
1902 1 50       3 warn ivis '#autodetect: Trying RX $rx ...\n' if $debug;
1903              
1904             # Make $rx the title_rx so __specs2cxdesclist() can be used
1905             # e.g. to handle regex COLSPECS. Pass special option to not tie
1906             # user variables yet.
1907 1         7 $self->title_rx($rx, "_notie");
1908 1 50       25 oops unless $rx == $$self->{title_rx};
1909              
1910 1         6 my $row = $rows->[$rx];
1911 1         6 for my $cx ($first_cx .. $last_cx) {
1912 2 50       5 if ($row->[$cx] eq "") {
1913 0         0 push @nd_reasons, "rx $rx: col ".__fmt_cx($cx)." is empty";
1914 0         0 next RX;
1915             }
1916             }
1917 1         6 foreach my $spec (@required_specs) {
1918 0         0 my @list; # A regex might match multiple titles
1919 0         0 eval { @list = $self->_specs2cxdesclist($spec) };
  0         0  
1920 0 0       0 warn ivis ' found $spec in @list\n' if $debug;
1921 0 0       0 if (@list == 0) {
1922 0         0 push @nd_reasons, ivis 'rx $rx: Required column \'$spec\' not found';
1923             next RX
1924 0         0 }
1925 0 0       0 my @shortlist = grep{ $_->[0] >= $first_cx && $_->[0] <= $last_cx }
  0         0  
1926             @list;
1927 0 0       0 if (@shortlist == 0) {
1928 0         0 push @nd_reasons, ivis 'rx $rx: Matched \'$spec\' but in unacceptable cx '.avisl(map{$_->[0]} @list);
  0         0  
1929             next RX
1930 0         0 }
1931 0 0       0 if (! grep{ $_->[1] =~ /title/i } @shortlist) {
  0         0  
1932             ### ??? Can this actually happen ???
1933 0         0 push @nd_reasons, ivis 'rx $rx: \'$spec\' resolved to something other than a title: '.__fmt_pairs(@shortlist);
1934 0         0 next RX;
1935             }
1936 0 0       0 warn ivis ' <<cx is within $first_cx .. $last_cx>>\n' if $debug;
1937             }
1938 1         3 $detected = $rx;
1939             last
1940 1         2 }
1941 1         5 $$self->{title_rx} = undef; # will re-do below
1942             }
1943 1 50       11 if (defined $detected) {
1944 1 50       6 if ($verbose) { # should be $debug ??
1945 0         0 my ($fn, $lno, $methname) = __fn_ln_methname();
1946 0         0 print STDERR "[Auto-detected title_rx = $detected at ${fn}:$lno]\n";
1947             }
1948 1         5 local $$self->{cmd_nesting} = $$self->{cmd_nesting} + 1;
1949 1         4 local $$self->{verbose} = 0; # suppress normal logging
1950 1         5 $self->title_rx($detected); # shows collision warnings unless {silent}
1951 1         6 return $detected;
1952             } else {
1953 0 0       0 if (@nd_reasons == 0) {
1954 0         0 push @nd_reasons, ivis '(BUG?) No rows checked! num_cols=$num_cols rows=$$self->{rows}'.dvis '\n##($min_rx $max_rx $first_cx $last_cx)' ;
1955             }
1956 0         0 croak("In ",qsh(fmt_sheet($self))," ...\n",
1957             " Auto-detect of title_rx with options ",vis($opthash),
1958             dvis ' @required_specs\n',
1959             " failed because:\n ", join("\n ",@nd_reasons),
1960             "\n"
1961             );
1962             }
1963             }
1964              
1965             # move_cols ">COLSPEC",source cols...
1966             # move_cols "absolute-position",source cols...
1967             sub move_cols($@) {
1968 0     0 1 0 my $self = &__selfmust;
1969 0         0 my ($posn, @sources) = @_;
1970              
1971 0         0 my ($num_cols, $rows) = @$$self{qw/num_cols rows/};
1972              
1973 0         0 my $to_cx = $self->_relspec2cx($posn);
1974              
1975 0         0 my @source_cxs = map { scalar $self->_spec2cx($_) } @sources;
  0         0  
1976 0         0 my @source_cxs_before = grep { $_ < $to_cx } @source_cxs;
  0         0  
1977 0         0 my $insert_offset = $to_cx - scalar(@source_cxs_before);
1978 0         0 my @rsorted_source_cxs = sort { $b <=> $a } @source_cxs;
  0         0  
1979              
1980             $self->_logmethifv(\__fmt_colspec_cx($posn,$to_cx), \" <-- ",
1981 0         0 \join(" ",map{"$source_cxs[$_]\[$_\]"} 0..$#source_cxs));
  0         0  
1982              
1983 0 0       0 croak "move destination is too far to the right\n"
1984             if $to_cx + @sources - @source_cxs_before > $num_cols;
1985              
1986 0         0 my @old_cxs = (0..$num_cols-1);
1987              
1988 0         0 foreach my $row (@$rows, \@old_cxs) {
1989 0         0 my @moving_cells = @$row[@source_cxs]; # save
1990 0         0 splice @$row, $_, 1 foreach (@rsorted_source_cxs); # delete
1991 0         0 splice @$row, $insert_offset, 0, @moving_cells; # put back
1992             };
1993              
1994 0         0 $self->_adjust_colx(\@old_cxs);
1995 0         0 $self
1996             }
1997 0     0 1 0 sub move_col($$) { goto &move_cols; }
1998              
1999             # insert_cols ">COLSPEC",new titles (or ""s or undefs if no title row)
2000             # insert_cols "absolute-position",...
2001             # RETURNS: The new colum indicies, or in scalar context the first cx
2002             sub insert_cols($@) {
2003 0     0 1 0 my $self = &__selfmust;
2004 0         0 my ($posn, @new_titles) = @_;
2005             my ($num_cols, $colx, $rows, $title_rx)
2006 0         0 = @$$self{qw/num_cols colx rows title_rx/};
2007              
2008 0         0 my $to_cx = $self->_relspec2cx($posn);
2009              
2010 0         0 $self->_logmethifv(\__fmt_colspec_cx($posn,$to_cx), \" <-- ", \avis(@new_titles));
2011              
2012 0   0     0 @new_titles = map { $_ // "" } @new_titles; # change undef to ""
  0         0  
2013 0 0   0   0 if (first { $_ ne "" } @new_titles) {
  0         0  
2014 0 0       0 croak "insert_cols: Can not specify titles unless title_rx is defined\n"
2015             unless defined $title_rx;
2016 0         0 my $title_row = $rows->[$title_rx];
2017 0         0 my %seen;
2018 0         0 foreach my $ntitle (@new_titles) {
2019 0 0       0 croak "New title '$ntitle' specified more than once!" if $seen{$ntitle}++;
2020 0         0 my $ex_rx = $colx->{$ntitle}; # for now can't clash with ABC codes
2021 0 0       0 croak "insert_cols: New title '$ntitle' clashes with rx $ex_rx"
2022             if defined $ex_rx;
2023             }
2024             } else {
2025             # NO. Allow new columns without a title
2026             #croak "You must specify non-undef titles if title_rx is defined\n"
2027             # if defined $title_rx;
2028             }
2029 0         0 my $num_insert_cols = @new_titles;
2030              
2031 0         0 foreach my $row (@$rows) {
2032 0 0 0     0 if (defined $title_rx && $row == $rows->[$title_rx]) {
2033 0         0 splice @$row, $to_cx, 0, @new_titles;
2034             } else {
2035 0         0 splice @$row, $to_cx, 0, (("") x $num_insert_cols);
2036             }
2037             }
2038 0         0 $$self->{num_cols} += $num_insert_cols;
2039              
2040 0         0 $self->_adjust_colx(
2041             [ 0..$to_cx-1, ((undef) x $num_insert_cols), $to_cx..$num_cols-1 ]
2042             ); #calls _rebuild_colx();
2043              
2044 0         0 __first_ifnot_wantarray( $to_cx .. $to_cx+$num_insert_cols-1 )
2045             }
2046 0     0 1 0 sub insert_col($$) { goto &insert_cols }
2047              
2048             # sort_rows {compare function}
2049             # sort_rows {compare function} $first_rx, $last_rx
2050             sub sort_rows(&) {
2051 0     0 1 0 my $self = &__selfmust;
2052 0 0       0 croak "bad args" unless @_ == 1;
2053 0         0 my ($cmpfunc, $first_rx, $last_rx) = @_;
2054              
2055 0         0 my ($rows, $linenums, $title_rx) = @$$self{qw/rows linenums title_rx/};
2056              
2057 0 0 0     0 $first_rx //= (defined($title_rx) ? $title_rx+1 : 0);
2058 0   0     0 $last_rx //= $#$rows;
2059              
2060 0         0 $self->_logmethifv(\"(sorting rx ${first_rx}..${last_rx})");
2061              
2062 0 0       0 oops unless defined($first_rx);
2063 0 0       0 oops unless defined($last_rx);
2064 0         0 my $pkg = caller;
2065             my @indicies = sort {
2066 0         0 my @row_indicies = ($a, $b);
  0         0  
2067 3     3   32 no strict 'refs';
  3         9  
  3         22014  
2068 0         0 local ${ "$pkg\::a" } = $rows->[$a]; # actual row objects
  0         0  
2069 0         0 local ${ "$pkg\::b" } = $rows->[$b];
  0         0  
2070 0         0 $cmpfunc->(@row_indicies)
2071             } ($first_rx..$last_rx);
2072              
2073 0         0 @$rows[$first_rx..$#$rows] = @$rows[@indicies];
2074 0         0 @$linenums[$first_rx..$#$rows] = @$linenums[@indicies];
2075              
2076 0         0 __validate_not_scalar_context(0..$first_rx-1, @indicies, $last_rx+1..$#$rows)
2077             }
2078              
2079             sub delete_cols(@) {
2080 0     0 1 0 my $self = &__selfmust;
2081 0         0 my (@cols) = @_;
2082 0         0 my ($num_cols, $rows) = @$$self{qw/num_cols rows/};
2083              
2084 0         0 my @cxlist = $self->_colspecs_to_cxs_ckunique(\@cols);
2085              
2086 0         0 my @reverse_cxs = sort { $b <=> $a } @cxlist;
  0         0  
2087              
2088 0         0 $self->_logmethifv(reverse @reverse_cxs);
2089 0         0 my @old_cxs = (0..$num_cols-1);
2090 0         0 for my $row (@$rows, \@old_cxs) {
2091 0         0 foreach my $cx (@reverse_cxs) {
2092 0 0       0 oops if $cx > $#$row;
2093 0         0 splice @$row, $cx, 1, ();
2094             }
2095             }
2096 0         0 $$self->{num_cols} -= @reverse_cxs;
2097 0         0 $self->_adjust_colx(\@old_cxs);
2098 0         0 $self
2099             }
2100 0     0 1 0 sub delete_col($) { goto &delete_cols; }
2101              
2102             # Logic which forces verbose on when debug is on, etc.
2103             # Used by new() and options()
2104             sub _set_verbose_debug_silent(@) {
2105 6     6   12 my $self = shift;
2106 6         41 foreach (pairs @_) {
2107 12         32 my ($key, $val) = @$_;
2108 12         24 my $oldval = $$self->{$key};
2109             next
2110 12 50       44 unless !!$oldval != !!$val;
2111 0 0       0 if ($key eq "silent") {
    0          
    0          
2112 0         0 $$self->{$key} = $val;
2113             }
2114             elsif ($key eq "verbose") {
2115 0 0       0 if ($val) {
2116 0         0 $$self->{saved_silent} = $$self->{silent};
2117 0         0 $$self->{silent} = 0; #?? might still want to suppress warnings
2118             } else {
2119 0         0 $$self->{silent} = delete $$self->{saved_silent};
2120             }
2121             }
2122             elsif ($key eq "debug") {
2123 0 0       0 if ($val) {
2124 0         0 $$self->{saved_verbose} = $$self->{verbose};
2125 0         0 $$self->{saved_silent} = $$self->{silent};
2126 0         0 $$self->{silent} = 0;
2127 0         0 $$self->{verbose} = "forced by {debug}";
2128             } else {
2129 0         0 $$self->{verbose} = delete $$self->{saved_verbose};
2130 0         0 $$self->{silent} = delete $$self->{saved_silent};
2131             }
2132             }
2133 0         0 else { confess "options: Unknown option key '$key'\n"; }
2134 0         0 $$self->{$key} = $val;
2135             }
2136             }
2137              
2138             # Get or set option(s).
2139             # New settings may be in an {OPTIONS} hash and/or linear args.
2140             # Always returns the old options (key => value pairs).
2141             sub options(@) {
2142 0     0 1 0 my $self = &__self; # auto-create sheet if necessary
2143 0 0       0 my @old = map{ exists($$self->{$_}) ? ($_ => $$self->{$_}) : () }
  0         0  
2144             qw/verbose debug silent/;
2145              
2146 0         0 my %eff_args;
2147 0 0       0 if (@_ == 0) {
2148 0 0       0 croak "(list) returned but called in scalar or void context"
2149             unless wantarray;
2150             } else {
2151 0         0 my $opthash = &__opthash; # shift off 1st arg iff it is a hashref
2152 0         0 %eff_args = (%$opthash, &__validate_pairs);
2153 0         0 $self->_set_verbose_debug_silent(%eff_args);
2154             }
2155 0         0 $self->_logmethretifv([\__fmt_pairlist(%eff_args)], [\hvis(@old)]);
2156 0         0 @old;
2157             }
2158              
2159             sub _colspecs_to_cxs_ckunique {
2160 0 0   0   0 my ($self, $colspecs) = @_; oops unless @_==2;
  0         0  
2161 0         0 my @cxlist;
2162             my %seen;
2163 0         0 foreach (@$colspecs) {
2164 0         0 my $cx = $self->_spec2cx($_); # auto-detects title_rx if needed
2165 0 0       0 if ($seen{$cx}) {
2166 0         0 croak "cx $cx is specified by multiple COLSPECs: ", vis($_)," and ",vis($seen{$cx}),"\n";
2167             }
2168 0         0 $seen{ $cx } = $_;
2169 0         0 push @cxlist, $cx;
2170             }
2171             @cxlist
2172 0         0 }
2173              
2174             sub only_cols(@) {
2175 0     0 1 0 my $self = &__selfmust;
2176 0         0 my @cols = @_;
2177 0         0 my $rows = $self->rows;
2178              
2179             # Replace each row with just the surviving columns, in the order specified
2180 0         0 my @cxlist = $self->_colspecs_to_cxs_ckunique(\@cols);
2181 0         0 for my $row (@$rows) {
2182 0         0 @$row = map{ $row->[$_] } @cxlist;
  0         0  
2183             }
2184 0         0 $$self->{num_cols} = scalar(@cxlist);
2185 0         0 $self->_adjust_colx(\@cxlist);
2186 0         0 $self
2187             }
2188              
2189             # obj->join_cols separator_or_coderef, colspecs...
2190             # If coderef:
2191             # $_ is bound to the first-named column, and is the destination
2192             # @_ is bound to all named columns, in the order named.
2193             sub join_cols(&@) {
2194 0     0 1 0 my $self = &__selfmust;
2195 0         0 my ($separator, @sources) = @_;
2196 0         0 my $hash = $$self;
2197              
2198 0         0 my ($num_cols, $rows) = @$hash{qw/num_cols rows/};
2199              
2200 0         0 my @source_cxs = map { scalar $self->_spec2cx($_) } @sources;
  0         0  
2201             $self->_logmethifv(\"'$separator' ",
2202 0         0 \join(" ",map{"$source_cxs[$_]\[$_\]"} 0..$#source_cxs));
  0         0  
2203              
2204 0         0 my $saved_v = $hash->{verbose}; $hash->{verbose} = 0;
  0         0  
2205              
2206             # Merge the content into the first column. N.B. EXCLUDES title row.
2207             my $code = ref($separator) eq 'CODE'
2208             ? $separator
2209 0 0   0   0 : sub{ $_ = join $separator, @_ } ;
  0         0  
2210              
2211 0   0     0 { my $first_rx = ($hash->{title_rx} // -1)+1;
  0         0  
2212 0         0 _apply_to_rows($self, $code, \@source_cxs, undef, $first_rx, undef);
2213             }
2214              
2215             # Delete the other columns
2216 0         0 $self->delete_cols(@source_cxs[1..$#source_cxs]);
2217              
2218 0         0 $$self->{verbose} = $saved_v;
2219 0         0 $self
2220             }
2221 0     0 1 0 sub join_cols_sep($@) { goto &join_cols } # to match the functional API
2222              
2223             sub rename_cols(@) {
2224 0     0 1 0 my $self = &__selfmust;
2225 0 0       0 croak "rename_cols expects an even number of arguments\n"
2226             unless scalar(@_ % 2)==0;
2227              
2228 0         0 my ($num_cols, $rows, $title_rx) = @$$self{qw/num_cols rows title_rx/};
2229              
2230 0 0       0 croak "rename_cols: No title_rx is defined!\n"
2231             unless defined $title_rx;
2232              
2233 0         0 my $title_row = $rows->[$title_rx];
2234              
2235 0         0 while (@_) {
2236 0         0 my $old_title = shift @_;
2237 0         0 my $new_title = shift @_;
2238 0         0 my $cx = $self->_spec2cx($old_title);
2239 0         0 $self->_logmethifv($old_title, \" -> ", $new_title, \" [cx $cx]");
2240 0 0       0 croak "rename_cols: Column $old_title is too large\n"
2241             if $cx > $#$title_row; # it must have been an absolute form
2242 0         0 $title_row->[$cx] = $new_title;
2243              
2244             # N.B. aliases remain pointing to the same columns regardless of names
2245             }
2246 0         0 $self->_rebuild_colx();
2247 0         0 $self
2248             }
2249              
2250             # apply {code}, colspec*
2251             # @_ are bound to the columns in the order specified (if any)
2252             # $_ is bound to the first such column
2253             # Only visit rows following the title row (if defined).
2254             sub apply(&;@) {
2255 1     1 1 6 my $self = &__selfmust;
2256 1         4 my ($code, @cols) = @_;
2257 1         4 my $hash = $$self;
2258 1         9 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2259              
2260 1   50     12 my $first_rx = ($hash->{title_rx} // -1) + 1;
2261              
2262 1         8 @_ = ($self, $code, \@cxs, undef, $first_rx, $#{$hash->{rows}});
  1         7  
2263 1         10 goto &_apply_to_rows
2264             }
2265              
2266             # apply_all {code}, colspec*
2267             # Like apply, but ALL rows are visited, inluding the title row if any
2268             sub apply_all(&;@) {
2269 0     0 1 0 my $self = &__selfmust;
2270 0         0 my ($code, @cols) = @_;
2271 0         0 my $hash = $$self;
2272 0         0 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2273 0         0 log_methcall $self, [\"rx 0..",$#{$hash->{rows}},
2274             @cxs > 0 ? \(" cxs=".avis(@cxs)) : ()]
2275 0 0       0 if $$self->{verbose};
    0          
2276 0         0 @_ = ($self, $code, \@cxs);
2277 0         0 goto &_apply_to_rows
2278             }
2279              
2280             sub __arrify_checknotempty($) {
2281 5     5   26 local $_ = shift;
2282 5 100       39 my $result = ref($_) eq 'ARRAY' ? $_ : [ $_ ];
2283             croak "Invalid argument ",vis($_)," (expecting [array ref] or single value)\n"
2284 5 50 33     46 unless @$result > 0 && !grep{ref($_) || $_ eq ""} @$result;
  5 50       63  
2285 5         21 $result
2286             }
2287              
2288             # apply_torx {code} rx, colspec*
2289             # apply_torx {code} [rx list], colspec*
2290             # Only the specified row(s) are visited
2291             sub apply_torx(&$;@) {
2292 5     5 1 519 my $self = &__selfmust;
2293 5         22 my ($code, $rxlist_arg, @cols) = @_;
2294 5 50       36 croak "Missing rx (or [list of rx]) argument\n" unless defined $rxlist_arg;
2295 5         40 my $rxlist = __arrify_checknotempty($rxlist_arg);
2296 5         14 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2297             log_methcall $self, [\vis($rxlist_arg),
2298             @cxs > 0 ? \(" cxs=".avis(@cxs)) : ()]
2299 5 0       26 if $$self->{verbose};
    50          
2300 5         28 @_ = ($self, $code, \@cxs, $rxlist);
2301 5         67 goto &_apply_to_rows
2302             }
2303              
2304             # apply_exceptrx {code} [rx list], colspec*
2305             # All rows EXCEPT the specified rows are visited
2306             sub apply_exceptrx(&$;@) {
2307 0     0 1 0 my $self = &__selfmust;
2308 0         0 my ($code, $exrxlist_arg, @cols) = @_;
2309 0 0       0 croak "Missing rx (or [list of rx]) argument\n" unless defined $exrxlist_arg;
2310 0         0 my $exrxlist = __arrify_checknotempty($exrxlist_arg);
2311 0         0 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2312             log_methcall $self, [\vis($exrxlist_arg),
2313             @cxs > 0 ? \(" cxs=".avis(@cxs)) : ()]
2314 0 0       0 if $$self->{verbose};
    0          
2315 0         0 my $hash = $$self;
2316 0         0 my $max_rx = $#{ $hash->{rows} };
  0         0  
2317 0         0 foreach (@$exrxlist) {
2318 0 0 0     0 croak "rx $_ is out of range\n" if $_ < 0 || $_ > $max_rx;
2319             }
2320 0         0 my %exrxlist = map{ $_ => 1 } @$exrxlist;
  0         0  
2321 0         0 my $rxlist = [ grep{ ! exists $exrxlist{$_} } 0..$max_rx ];
  0         0  
2322 0         0 @_ = ($self, $code, \@cxs, $rxlist);
2323 0         0 goto &_apply_to_rows
2324             }
2325              
2326             # split_col {code} oldcol, newcol_start_position, new titles...
2327             # {code} is called for each row with $_ bound to <oldcol>
2328             # and @_ bound to the new column(s).
2329             # The old column is left as-is (not deleted).
2330             sub split_col(&$$$@) {
2331 0     0 1 0 my $self = &__selfmust;
2332 0         0 my ($code, $oldcol_posn, $newcols_posn, @new_titles) = @_;
2333              
2334 0         0 my $num_insert_cols = @new_titles;
2335 0         0 my $old_cx = $self->_spec2cx($oldcol_posn);
2336 0         0 my $newcols_first_cx = $self->_relspec2cx($newcols_posn);
2337              
2338             log_methcall $self, [\"... $oldcol_posn\[$old_cx] -> [$newcols_first_cx]",
2339             avis(@new_titles)]
2340 0 0       0 if $$self->{verbose};
2341              
2342 0         0 local $$self->{verbose} = 0;
2343              
2344 0         0 $self->insert_cols($newcols_first_cx, @new_titles);
2345              
2346 0 0       0 $old_cx += $num_insert_cols if $old_cx >= $newcols_first_cx;
2347              
2348 0         0 $self->apply($code,
2349             $old_cx, $newcols_first_cx..$newcols_first_cx+$num_insert_cols-1);
2350              
2351 0         0 $self
2352             }
2353              
2354             sub reverse_cols() {
2355 0     0 1 0 my $self = &__selfmust;
2356 0         0 my ($rows, $num_cols) = @$$self{qw/rows num_cols/};
2357 0 0       0 log_methcall $self, [] if $$self->{verbose};
2358 0         0 for my $row (@$rows) {
2359 0         0 @$row = reverse @$row;
2360             }
2361 0         0 $self->_adjust_colx([reverse 0..$num_cols-1]);
2362 0         0 $self
2363             }
2364              
2365             sub transpose() {
2366 0     0 1 0 my $self = &__selfmust;
2367 0 0       0 log_methcall $self, [] if $$self->{verbose};
2368              
2369 0         0 my ($rows, $old_num_cols, $linenums) = @$$self{qw/rows num_cols linenums/};
2370              
2371 0         0 $$self->{useraliases} = {};
2372 0         0 $$self->{title_rx} = undef;
2373              
2374             # Save a copy of the data
2375 0         0 my @old_rows = ( map{ [ @$_ ] } @$rows );
  0         0  
2376              
2377             # Rebuild the spreadsheet
2378 0         0 @$rows = ();
2379 0         0 $$self->{num_cols} = scalar @old_rows;
2380              
2381 0         0 for (my $ocx=0; $ocx < $old_num_cols; ++$ocx) {
2382 0         0 my @nrow;
2383 0         0 for my $row (@old_rows) {
2384 0   0     0 push @nrow, $row->[$ocx] // "";
2385             }
2386 0         0 push @$rows, \@nrow;
2387             }
2388 0 0       0 if ($$self->{saved_linenums}) {
2389 0         0 @$linenums = @{ $$self->{saved_linenums} };
  0         0  
2390 0         0 delete $$self->{saved_linenums};
2391             } else {
2392 0         0 $$self->{saved_linenums} = [ @$linenums ];
2393 0         0 @$linenums = ("?") x scalar @$rows;
2394             }
2395 0   0     0 $$self->{data_source} //= "";
2396 0         0 $$self->{data_source} .= " transposed";
2397              
2398 0         0 $self->_rows_replaced;
2399 0         0 $self
2400             }#transpose
2401              
2402             # delete_rows rx ...
2403             # delete_rows 'LAST' ...
2404             # delete_rows '$' ...
2405             sub delete_rows(@) {
2406 0     0 1 0 my $self = &__selfmust;
2407 0         0 my (@rowspecs) = @_;
2408              
2409             my ($rows, $linenums, $title_rx, $current_rx, $verbose)
2410 0         0 = @$$self{qw/rows linenums title_rx current_rx verbose/};
2411              
2412 0         0 foreach (@rowspecs) {
2413 0 0       0 $_ = $#$rows if /^(?:LAST|\$)$/;
2414 0         0 __validate_nonnegi($_, "rx to delete");
2415 0 0       0 croak "Invalid row index '$_'\n" unless $_ <= $#$rows;
2416             }
2417 0         0 my @rev_sorted_rxs = sort {$b <=> $a} @rowspecs;
  0         0  
2418 0 0       0 log_methcall $self, [reverse @rev_sorted_rxs] if $$self->{verbose};
2419              
2420             # Adjust if needed...
2421 0 0       0 if (defined $title_rx) {
2422 0         0 foreach (@rev_sorted_rxs) {
2423 0 0       0 if ($_ < $title_rx) { --$title_rx }
  0 0       0  
2424             elsif ($_ == $title_rx) {
2425             $self->_log("Invalidating titles because rx $title_rx is being deleted\n")
2426 0 0       0 if $$self->{verbose};
2427 0         0 $title_rx = undef;
2428 0         0 last;
2429             }
2430             }
2431 0         0 $$self->{title_rx} = $title_rx;
2432             }
2433              
2434             # Back up $current_rx to account for deleted rows.
2435             # $current_rx is left set to one less than the index of the "next" row if
2436             # we are in an apply(). That is, current_rx will be left still pointing to
2437             # the same row as before, or if that row has been deleted then the row
2438             # before that (or -1 if row zero was deleted).
2439 0 0       0 if (defined $current_rx) {
2440 0         0 foreach (@rev_sorted_rxs) {
2441 0 0       0 --$current_rx if ($_ <= $current_rx);
2442             }
2443 0         0 $$self->{current_rx} = $current_rx;
2444             }
2445              
2446             #warn "### BEFORE delete_rows rx (@rev_sorted_rxs):\n",
2447             # map( { " [$_]=(".join(",",@{$rows->[$_]}).")\n" } 0..$#$rows);
2448              
2449 0         0 for my $rx (@rev_sorted_rxs) {
2450 0         0 splice @$rows, $rx, 1, ();
2451 0         0 splice @$linenums, $rx, 1, ();
2452             }
2453              
2454             #warn "### AFTER delete_rows:\n",
2455             # map( { " [$_]=(".join(",",@{$rows->[$_]}).")\n" } 0..$#$rows);
2456             $self
2457 0         0 }#delete_rows
2458 0     0 0 0 sub delete_row($) { goto &delete_rows; }
2459              
2460             # $firstrx = insert_rows [rx [,count]]
2461             # $firstrx = insert_rows ['$'[,count]]
2462             sub insert_rows(;$$) {
2463 0     0 1 0 my $self = &__selfmust;
2464 0         0 my ($rx, $count) = @_;
2465 0   0     0 $rx //= 'END';
2466 0   0     0 $count //= 1;
2467              
2468             my ($rows, $linenums, $num_cols, $title_rx)
2469 0         0 = @$$self{qw/rows linenums num_cols title_rx/};
2470              
2471 0 0       0 $rx = @$rows if $rx =~ /^(?:END|\$)$/;
2472              
2473             log_methcall $self, [\"at rx $rx (count $count)"], [$rx]
2474 0 0       0 if $$self->{verbose};
2475              
2476 0         0 __validate_nonnegi($rx, "new rx");
2477              
2478 0 0 0     0 if (defined($title_rx) && $rx <= $title_rx) {
2479 0         0 $$self->{title_rx} = ($title_rx += $count);
2480             }
2481              
2482 0         0 for (1..$count) {
2483 0         0 splice @$rows, $rx, 0, [("") x $num_cols];
2484 0         0 splice @$linenums, $rx, 0, "??";
2485             }
2486              
2487 0         0 $rx;
2488             }
2489 0     0 1 0 sub insert_row(;$) { goto &insert_rows; }
2490              
2491             # read_spreadsheet $inpath [Spreadsheet::Edit::IO::OpenAsCSV options...]
2492             # read_spreadsheet $inpath [,iolayers =>... or encoding =>...]
2493             # read_spreadsheet $inpath [,{iolayers =>... or encoding =>... }] #OLD API
2494             # read_spreadsheet [{iolayers =>... or encoding =>... }, ] $inpath #NEW API
2495             #
2496             # Titles are auto-detected by default, but this may be controlled
2497             # via {OPTIONS}:
2498             # title_rx => rx # Don't autodetect; set the specified title_rx
2499             # title_rx => undef # Don't autodetect; no titles
2500             # OR: autodetect options, passed thru to title_rx()
2501             #
2502             sub read_spreadsheet($;@) {
2503 2     2 1 14 my ($self, $opthash, $inpath) = &__self_opthash_1arg;
2504 2         6 my $orig_opthash = { %$opthash };
2505              
2506 2         8 my $saved_stdopts = $self->_set_stdopts($opthash);
2507 2     2   14 scope_guard{ $self->_restore_stdopts($saved_stdopts) };
  2         5  
2508              
2509 2         22 my %csvopts = @sane_CSV_read_options;
2510             # Separate out Text::CSV options from %$opthash
2511 2         11 foreach my $key (Text::CSV::known_attributes()) {
2512             #$csvopts{$key} = delete $opthash{$key} if exists $opthash{$key};
2513 62 50       180 $csvopts{$key} = $opthash->{$key} if defined $opthash->{$key};
2514 62         96 delete $opthash->{$key};
2515             }
2516 2         14 $csvopts{escape_char} = $csvopts{quote_char}; # " : """
2517              
2518             croak "Obsolete {sheet} key in options (use 'sheetname')"
2519 2 50       7 if exists $opthash->{sheet};
2520              
2521 2         16 __validate_opthash( $opthash,
2522             [
2523             qw/title_rx/,
2524             qw/iolayers encoding verbose silent debug/,
2525             qw/tempdir use_gnumeric raw_values sheetname/, # for OpenAsCsv
2526             qw/required min_rx max_rx first_cx last_cx/, # for title_rx
2527             ],
2528             desc => "read_spreadsheet option",
2529             undef_ok_only => [qw/title_rx iolayers encoding verbose silent debug
2530             use_gnumeric/] );
2531              
2532             # convert {encoding} to {iolayers}
2533 2 50       8 if (my $enc = delete $opthash->{encoding}) {
2534             #warn "Found OBSOLETE read_spreadsheet 'encoding' opt (use iolayers instead)\n";
2535 0   0     0 $opthash->{iolayers} = ($opthash->{iolayers}//"") . ":encoding($enc)";
2536             }
2537             # Same as last-used, if any
2538             # N.B. If user says nothing, OpenAsCsv() defaults to UTF-8
2539 2   50     35 $opthash->{iolayers} //= $$self->{iolayers} // "";
      33        
2540              
2541             my ($rows, $linenums, $meta_info, $verbose, $debug)
2542 2         7 = @$$self{qw/rows linenums meta_info verbose debug/};
2543              
2544             ##$self->_check_currsheet;
2545              
2546 2         4 my $hash;
2547 2         2 { local $$self->{verbose} = 0;
  2         6  
2548             $hash = OpenAsCsv(
2549             inpath => $inpath,
2550             debug => $$self->{debug},
2551             silent => $$self->{silent},
2552 2   33     22 verbose => ($$self->{verbose} || $$self->{debug}),
2553             %$opthash, # all our opts are valid here
2554             );
2555             }
2556              
2557             ### TODO: Split off the following into a separate read_csvdata() method
2558             ### which takes a file handle? This might be useful so users
2559             ### can open arbitrary sources (even a pipe) and parse the data
2560             ### (e.g. /etc/passwd with : as the separator).
2561             ### ...but unclear how to handle encoding
2562              
2563             # Save possibly-defaulted iolayers for use in subsequent write_csv
2564 2   33     16 $$self->{iolayers} //= $hash->{iolayers};
2565              
2566 2         3 my $fh = $hash->{fh};
2567              
2568 2         4 $csvopts{keep_meta_info} = 1;
2569 2 50       11 my $csv = Text::CSV->new (\%csvopts)
2570             or croak "read_spreadsheet: ".Text::CSV->error_diag ()
2571             .dvis('\n## %csvopts\n');
2572              
2573 2         425 undef $$self->{num_cols};
2574 2         22 @$rows = ();
2575 2         5 @$linenums = ();
2576 2         3 my $lnum = 1;
2577 2         73 while (my $F = $csv->getline( $fh )) {
2578 6         414 push(@$linenums, $lnum);
2579 6         39 my @minfo = $csv->meta_info();
2580             # Force quoting of fields which look like negative numbers with an ascii
2581             # minus (\x{2D}) rather than Unicode math minus (\N{U+2212}).
2582             # This prevents conversion to the Unicode math minus when LibreOffice
2583             # reads the CSV. The assumption is that if the input, when converted
2584             # TO a csv, has an ascii minus then the original spreadsheet cell format
2585             # was "text" not numeric.
2586 6         63 for my $cx (0..$#$F) {
2587             #...TODO $minfo[$cx] |= 0x0001 if $F->[$cx] =~ /^-[\d.]+$/a;
2588             }
2589 6         13 push(@$meta_info, \@minfo);
2590 6         16 $lnum = $.+1;
2591 6         22 push(@$rows, $F);
2592             }
2593 2 50       221 close $fh || croak "Error reading $hash->{csvpath}: $!\n";
2594              
2595             $$self->{data_source} =
2596 2         18 form_spec_with_sheetname($hash->{inpath}, $hash->{sheetname});
2597 2         11 $$self->{sheetname} = $hash->{sheetname}; # possibly undef
2598              
2599 2         7 $self->_rows_replaced;
2600              
2601             # Set title_rx, either to a value explicitly given in OPTIONS (possibly
2602             # undef, meaning no titles) or else auto-detect.
2603 2         3 my %autodetect_opts;
2604 2         6 foreach (qw/required min_rx max_rx first_cx last_cx/) {
2605 10 50       38 $autodetect_opts{$_} = $opthash->{$_} if exists($opthash->{$_});
2606             }
2607              
2608 2 100       6 my $arg = exists($opthash->{title_rx}) ? $opthash->{title_rx} : 'auto';
2609 2         4 { local $$self->{cmd_nesting} = $$self->{cmd_nesting} + 1;
  2         8  
2610 2         6 $autodetect_opts{verbose} = 0; # suppress logging
2611 2         7 $self->title_rx(\%autodetect_opts, $arg);
2612             }
2613              
2614             log_methcall $self, [$orig_opthash, $inpath,
2615             \" [title_rx set to ",vis($$self->{title_rx}),\"]"]
2616 2 50       11 if $$self->{verbose};
2617              
2618 2         25 $self
2619             }#read_spreadsheet
2620              
2621             # write_csv {OPTHASH} "/path/to/output.csv"
2622             # Cells will be quoted if the input was quoted, i.e. if indicated by meta_info.
2623             sub write_csv(*;@) {
2624 0     0 1 0 my $self = &__selfmust;
2625 0 0       0 my $opts = ref($_[0]) eq 'HASH' ? shift() : {};
2626 0         0 my $dest = shift;
2627              
2628 0         0 my %csvopts = ( @sane_CSV_write_options,
2629             quote_space => 0, # dont quote embedded spaces
2630             );
2631             # Separate out Text::CSV options from {OPTIONS}
2632 0         0 foreach my $key (Text::CSV::known_attributes()) {
2633 0 0       0 $csvopts{$key} = $opts->{$key} if defined $opts->{$key};
2634 0         0 delete $opts->{$key};
2635             }
2636              
2637 0         0 { my %notok = %$opts;
  0         0  
2638 0         0 delete $notok{$_} foreach (
2639             #removed above... Text::CSV::known_attributes(),
2640             qw/verbose silent debug/,
2641             );
2642 0 0       0 croak "Unrecognized OPTION(s): ",avislq(keys %notok) if %notok;
2643             }
2644              
2645 0   0     0 $opts->{iolayers} //= $$self->{iolayers} // "";
      0        
2646             # New API: opts->{iolayers} may have all 'binmode' arguments.
2647             # If it does not include encoding(...) then insert default
2648 0 0       0 if ($opts->{iolayers} !~ /encoding\(|:utf8/) {
2649 0   0     0 $opts->{iolayers} .= ":encoding(".
2650             ($self->input_encoding() || DEFAULT_WRITE_ENCODING)
2651             .")";
2652             }
2653 0 0       0 if ($opts->{iolayers} !~ /:(?:crlf|raw)\b/) {
2654             # Use platform default
2655             #$opts->{iolayers} .= ":crlf";
2656             }
2657              
2658             my ($rows, $meta_info, $num_cols, $verbose, $debug)
2659 0         0 = @$$self{qw/rows meta_info num_cols verbose debug/};
2660              
2661 0         0 my $fh;
2662 0 0       0 if (openhandle($dest)) { # an already-open file handle?
2663             log_methcall $self, [$opts, "<file handle specified> $opts->{iolayers} "
2664             .scalar(@$rows)." rows, $num_cols columns)"]
2665 0 0       0 if $$self->{verbose};
2666 0         0 $fh = $dest;
2667             } else {
2668             log_methcall $self, [$opts, $dest." $opts->{iolayers} ("
2669             .scalar(@$rows)." rows, $num_cols columns)"]
2670 0 0       0 if $$self->{verbose};
2671 0 0 0     0 croak "Output path suffix must be *.csv, not\n ",qsh($dest),"\n"
2672             if $dest =~ /\.([a-z]*)$/ && lc($1) ne "csv";
2673 0 0       0 open $fh,">$dest" or croak "$dest: $!\n";
2674             }
2675              
2676 0 0       0 binmode $fh, $opts->{iolayers} or die "binmode:$!";
2677              
2678             # Arrgh. Although Text::CSV is huge and complex and implements a complicated
2679             # meta_info mechanism to capture quoting details on input, there is no way to
2680             # use the captured info to specify quoting of output fields!
2681             # So we implement writing CSVs by hand here.
2682             #my $csv = Text::CSV->new (\%csvopts)
2683             # or die "write_csv: ".Text::CSV->error_diag ();
2684             #foreach my $row (@$rows) {
2685             # oops "UNDEF row" unless defined $row; # did user modify @rows?
2686             # $csv->print ($fh, $row);
2687             #};
2688              
2689             # 5/2/22 FIXME: Maybe meta_info could be used when writing, albiet in
2690             # a grotesque way:
2691             # If keep_meta_info is set > 9, then the output quotation style is
2692             # "like it was used in the input of the the last parsed record"; so
2693             # we could "parse" a dummy record to set the quote style before writing
2694             # each record, like this (see perldoc Text::CSV_XS "keep_meta_info"):
2695             # my $csv = Text::CSV_XS->new({ binary=>1, keep_meta_info=>11,
2696             # quote_space => 0 });
2697             # apply_all {
2698             # my $minfo = $meta_info[$rx];
2699             # my @dummy = map{ '', 'x', '""' or '"x'' } @$minfo; # HOW?
2700             # $csv->parse(join ",", @dummy); # set saved meta_info
2701             # $csv->print(*OUTHANDLE, $row);
2702             # }
2703             #
2704              
2705             # Much of the option handling code was copied from Text::CSV_PP.pm
2706             # which depends on default values of options we don't specify explicitly.
2707             # So create a Text::CSV object just to get the effective option values...
2708 0         0 { my $o = Text::CSV->new( \%csvopts );
  0         0  
2709 0         0 foreach my $key (Text::CSV::known_attributes()) {
2710 0         0 $csvopts{$key} = $o->{$key};
2711             }
2712             }
2713              
2714             my $re_esc = ($csvopts{escape_char} ne '' and $csvopts{escape_char} ne "\0")
2715 0 0 0     0 ? ($csvopts{quote_char} ne '') ? qr/(\Q$csvopts{quote_char}\E|\Q$csvopts{escape_char}\E)/ : qr/(\Q$csvopts{escape_char}\E)/
    0          
2716             : qr/(*FAIL)/;
2717 0         0 for my $rx (0..$#$rows) {
2718 0         0 my $row = $rows->[$rx];
2719 0         0 my $minfo = $meta_info->[$rx];
2720 0         0 my @results;
2721 0         0 for my $cx (0..$num_cols-1) {
2722 0         0 my $value = $row->[$cx];
2723 0 0       0 confess "ERROR: rx $rx, cx $cx : undef cell value" unless defined($value);
2724 0         0 my $mi = $minfo->[$cx]; # undef if input was missing columns in this row
2725             my $must_be_quoted = $csvopts{always_quote} ||
2726 0   0     0 (($mi//0) & 0x0001); # was quoted on input
2727 0 0       0 unless ($must_be_quoted) {
2728 0 0       0 if ($value eq '') {
2729 0 0       0 $must_be_quoted = 42 if $csvopts{quote_empty};
2730             } else {
2731 0 0       0 if ($csvopts{quote_char} ne '') {
2732 3     3   32 use bytes;
  3         7  
  3         32  
2733             $must_be_quoted=43 if
2734             ($value =~ /\Q$csvopts{quote_char}\E/) ||
2735             ($csvopts{sep_char} ne '' and $csvopts{sep_char} ne "\0" and $value =~ /\Q$csvopts{sep_char}\E/) ||
2736             ($csvopts{escape_char} ne '' and $csvopts{escape_char} ne "\0" and $value =~ /\Q$csvopts{escape_char}\E/) ||
2737             ($csvopts{quote_binary} && $value =~ /[\x00-\x1f\x7f-\xa0]/) ||
2738 0 0 0     0 ($csvopts{quote_space} && $value =~ /[\x09\x20]/);
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
2739             }
2740             }
2741             }
2742 0         0 $value =~ s/($re_esc)/$csvopts{escape_char}$1/g;
2743 0 0       0 if ($csvopts{escape_null}) {
2744 0         0 $value =~ s/\0/$csvopts{escape_char}0/g;
2745             }
2746 0 0       0 if ($must_be_quoted) {
2747 0         0 $value = $csvopts{quote_char} . $value . $csvopts{quote_char};
2748             }
2749 0 0       0 $fh->print($csvopts{sep_char}) unless $cx==0;
2750 0         0 $fh->print($value);
2751             }
2752 0         0 $fh->print($csvopts{eol});
2753             }
2754              
2755 0 0       0 if (! openhandle $dest) {
2756 0 0       0 close $fh || croak "Error writing $dest : $!\n";
2757             }
2758             $self
2759 0         0 }#write_csv
2760              
2761             # Write spreadsheet with specified column formats
2762             # {col_formats} is required
2763             # Unless {sheetname} is specified, the sheet name is the outpath basename
2764             # sans any suffix
2765             sub write_spreadsheet(*;@) {
2766 0     0 1 0 my ($self, $opts, $outpath) = &__self_opthash_1arg;
2767 0         0 my $colx = $$self->{colx};
2768              
2769 0 0       0 log_methcall $self, [$opts, $outpath] if $$self->{verbose};
2770              
2771             # {col_formats} may be [list of formats in column order]
2772             # or { COLSPEC => fmt, ..., __DEFAULT__ => fmt }
2773             # Transform the latter to the former...
2774 0   0     0 my $cf = $opts->{col_formats} // croak "{col_formats} is required";
2775 0 0       0 if (ref($cf) eq "HASH") {
2776 0         0 my ($default, @ary);
2777 0         0 while (my ($key, $fmt) = each %$cf) {
2778 0 0       0 ($default = $fmt),next if $key eq "__DEFAULT__";
2779 0   0     0 my $cx = $colx->{$key} // croak("Invalid COLSPEC '$key' in col_formats");
2780 0         0 $ary[$cx] = $fmt;
2781             }
2782 0 0       0 foreach (@ary) { $_ = $default if ! defined; }
  0         0  
2783 0         0 $cf = \@ary;
2784             }
2785 0         0 local $opts->{col_formats} = $cf;
2786              
2787 0         0 my ($csvfh, $csvpath) = tempfile(SUFFIX => ".csv");
2788 0         0 { local $$self->{verbose} = 0;
  0         0  
2789 0         0 $self->write_csv($csvfh, silent => 1, iolayers => ':encoding(UTF-8)',
2790             @sane_CSV_write_options);
2791             }
2792 0 0       0 close $csvfh or die "Error writing $csvpath : $!";
2793              
2794             # Default sheet name to output file basename sans suffix
2795 0   0     0 $opts->{sheetname} //= fileparse($outpath, qr/\.\w+/);
2796              
2797 0         0 convert_spreadsheet($csvpath,
2798             %$opts,
2799             iolayers => ':encoding(UTF-8)',
2800             cvt_from => "csv",
2801             outpath => $outpath,
2802             );
2803 0         0 $self
2804             }
2805              
2806             #====================================================================
2807             # These helpers are used by predefined magic sheet variables.
2808             # See code in Spreadsheet::Edit::import()
2809              
2810             # Return $self if during an apply, or if being examined by Data::Dumper ;
2811             # otherwise croak
2812             sub _onlyinapply {
2813 7     7   20 my ($self, $accessor) = @_;
2814 7 50       33 unless (defined $$self->{current_rx}) {
2815 0         0 for (my $lvl=2; ;$lvl++) {
2816 0   0     0 my $pkg = (caller($lvl))[0] || last;
2817 0 0 0     0 return $self
2818             if defined($pkg) && $pkg->isa("Data::Dumper") # perldoc UNIVERSAL
2819             }
2820 0         0 croak "Can't use $accessor now: Not during apply*\n"
2821             }
2822             $self
2823 7         22 }
2824             sub __getsheet($$$$) {
2825 9     9   31 my ($mutating, $pkg, $uvar, $onlyinapply) = @_;
2826 9         39 my $sheet = $pkg2currsheet{$pkg};
2827 9 50       42 croak("Modifying variable $uvar is not allowed\n")
2828             if $mutating;
2829 9 50       36 croak("Can not use $uvar: No sheet is currently valid for package $pkg\n")
2830             unless defined $sheet;
2831 9 100       65 $onlyinapply ? _onlyinapply($sheet, $uvar) : $sheet
2832             }
2833             sub _scal_tiehelper { # access a scalar sheet variable
2834 5     5   641 my($mutating, $pkg, $uvar, $ident, $onlyinapply) = @_;
2835 5         29 my $sheet = __getsheet($mutating, $pkg, $uvar, $onlyinapply);
2836 5 50       14 confess avisq(@_) unless exists $$sheet->{$ident};
2837 5         36 return \$$sheet->{$ident}; # return ref to the scalar
2838             }
2839             sub _aryelem_tiehelper { # access an element of an array sheet variable
2840 0     0   0 my($mutating, $pkg, $uvar, $index_ident, $array_ident, $onlyinapply) = @_;
2841             # E.g. for $title_row : index_ident="title_rx" and array_ident="rows"
2842 0         0 my $sheet = __getsheet($mutating, $pkg, $uvar, $onlyinapply);
2843 0   0     0 my $aref = $$sheet->{$array_ident} // oops dvisq '$array_ident @_'; # e.g. {rows}
2844 0   0     0 my $index = $$sheet->{$index_ident} // do{
2845 0 0 0     0 if ($index_ident eq "current_rx" or $index_ident eq "title_rx") {
2846             return \undef # During Data::Dumper inspection of current_row?
2847 0         0 }
2848 0         0 oops dvis '$array_ident $index_ident'; # otherwise it's a bug
2849             };
2850 0 0       0 oops(dvisq '@_ $index') if $index > $#$aref;
2851 0         0 return \$aref->[$index]; # return ref to scalar (the element in the array)
2852             }
2853             sub _refval_tiehelper { # access a sheet variable which is a ref of some kind
2854 4     4   1612223 my($mutating, $pkg, $uvar, $field_ident, $onlyinapply, $mutable) = @_;
2855 4 50       27 $mutating = 0 if $mutable;
2856 4         34 my $sheet = __getsheet($mutating, $pkg, $uvar, $onlyinapply);
2857 4         142 return $$sheet->{$field_ident}; # return the value, which is itself a ref
2858             }
2859              
2860             # Retrieve the sheet currently accessed by the functional API & tied globals
2861             # in the caller's package (or the specified package).
2862             # If an argument is passed, change the sheet to the specified sheet.
2863             #
2864             # Always returns the previous sheet (or undef)
2865             sub sheet(;$$) {
2866 14     14 1 536 my $opthash = &__opthash;
2867 14   66     78 my $pkg = $opthash->{package} // caller();
2868 14 50       44 oops if index($pkg,__PACKAGE__) >= 0; # not us or sub-pkg
2869 14 100       43 my $pkgmsg = $opthash->{package} ? " [for pkg $pkg]" : "";
2870 14         34 my $curr = $pkg2currsheet{$pkg};
2871 14   33     77 my $verbose = $opthash->{verbose} || ($curr && $$curr->{verbose});
2872 14 100       36 if (@_) {
2873 12         40 my $new = __validate_sheet_arg(shift @_);
2874 12 50       35 croak "Extraneous argument(s) in call to sheet()" if @_;
2875 12 100       32 if (defined $new) {
2876 9 50       90 oops if $$new->{cmd_nesting};
2877 9   33     53 $verbose ||= $$new->{verbose};
2878             }
2879              
2880 12 0       30 log_call [$opthash, \(" ".fmt_sheet($new)),
    50          
2881             \(u($curr) eq u($new)
2882             ? " [no change]"
2883             : " [previous: ".fmt_sheet($curr)."]"),
2884             \$pkgmsg]
2885             if $verbose;
2886              
2887 12         31 $pkg2currsheet{$pkg} = $new;
2888             } else {
2889 2 50       6 log_call [$opthash], [\fmt_sheet($curr), \$pkgmsg]
2890             if $verbose;
2891             }
2892 14         121 $curr
2893             }
2894              
2895             #====================================================================
2896             package
2897             Spreadsheet::Edit::RowsTie; # implements @rows and @$sheet
2898 3     3   6570 use parent 'Tie::Array';
  3         7  
  3         26  
2899              
2900 3     3   2095 use Carp;
  3         10  
  3         320  
2901             #our @CARP_NOT = qw(Tie::Indirect Tie::Indirect::Array
2902             # Tie::Indirect::Hash Tie::Indirect::Scalar);
2903 3         36 use Data::Dumper::Interp 6.004 qw/visnew
2904             vis viso avis alvis ivis dvis hvis hlvis
2905             visq visoq avisq alvisq ivisq dvisq hvisq hlvisq
2906 3     3   29 addrvis rvis rvisq u quotekey qsh qshlist qshpath/;
  3         82  
2907              
2908 3     3   4651 use Scalar::Util qw(looks_like_number weaken);
  3         8  
  3         2277  
2909 0     0   0 sub oops(@) { goto &Spreadsheet::Edit::oops }
2910              
2911             sub TIEARRAY {
2912 4     4   16 my ($classname, $sheet) = @_;
2913 4         12 my $o = bless [ [], $sheet], $classname;
2914 4         32 weaken $o->[1];
2915 4         10 $o
2916             }
2917             sub FETCH {
2918 47     47   92 my ($this, $index) = @_;
2919 47         73 my $aref = $this->[0];
2920 47 50 33     171 croak "Index ",u($index)," is invalid or out of range"
2921             unless $index >= 0 && $index <= $#$aref;
2922 47         179 $aref->[$index];
2923             }
2924             sub STORE {
2925 12     12   59 my ($this, $index, $val) = @_;
2926 12         23 my ($aref, $sheet) = @$this;
2927 12 50 33     53 croak "Index ",u($index)," is invalid or out of range"
2928             unless $index >= 0 && $index <= $#$aref+1;
2929 12 50       31 croak "Value must be a ref to array of cell values (not $val)"
2930             if ! Spreadsheet::Edit::__looks_like_aref($val);
2931             croak "Cell values may not be undef"
2932 12 50       24 if grep{! defined} @$val;
  30         77  
2933             croak "Cell values must be strings or numbers"
2934 12 50       17 if grep{ ref($_) && !looks_like_number($_) } @$val;
  30 50       78  
2935 12 50       96 if (my $num_cols = $$sheet->{num_cols}) {
2936 0 0       0 croak "New row must contain $num_cols cells (not ", $#$val+1, ")"
2937             if @$val != $num_cols;
2938             }
2939             # else (0 or undef) someone promises to set it later
2940              
2941             # Store a *copy* of the data to dispose of a Magicrow wrapper, if present
2942 12         39 my $cells = [ @$val ];
2943 12         47 $aref->[$index] = Spreadsheet::Edit::Magicrow->new($sheet, $cells);
2944             }
2945 246     246   433 sub FETCHSIZE { scalar @{ $_[0]->[0] } }
  246         970  
2946             sub STORESIZE {
2947 2     2   11 my ($this, $newlen) = @_;
2948 2         6 $#{ $this->[0] } = $newlen-1;
  2         10  
2949             }
2950             # End packageSpreadsheet::Edit::RowsTie
2951              
2952             #====================================================================
2953             package
2954             Spreadsheet::Edit::Magicrow;
2955              
2956 3     3   28 use Carp;
  3         8  
  3         289  
2957             our @CARP_NOT = qw(Spreadsheet::Edit);
2958 3     3   25 use Scalar::Util qw(weaken blessed looks_like_number);
  3         6  
  3         344  
2959 0     0   0 sub oops(@) { goto &Spreadsheet::Edit::oops }
2960 3     3   25 use Data::Dumper::Interp;
  3         7  
  3         20  
2961              
2962             sub new {
2963 12     12   25 my ($classname, $sheet, $cells) = @_;
2964 12         19 my %hashview; tie %hashview, __PACKAGE__, $cells, $sheet;
  12         34  
2965 12         56 bless \ [$cells, \%hashview], $classname;
2966             }
2967 110     110   158 use overload '@{}' => sub { ${ shift() }->[0] },
  110         376  
2968 4     4   12 '%{}' => sub { ${ shift() }->[1] },
  4         60  
2969             #'""' => sub { shift }, # defeats vis overload eval!
2970             #'0+' => sub { shift },
2971             #'==' => sub { my ($self, $other, $swap) = @_; $self == $other },
2972             #'eq' => sub { my ($self, $other, $swap) = @_; "$self" eq "$other" },
2973 3         26 fallback => 1, # for "" etc. FIXME: is this really ok?
2974 3     3   1882 ;
  3         10  
2975              
2976             sub TIEHASH {
2977 12     12   34 my ($pkg, $cells, $sheet) = @_;
2978 12         27 my $o = bless \ [$cells, $sheet], $pkg;
2979 12         101 weaken $$o->[1];
2980 12         31 $o
2981             }
2982             sub _cellref {
2983 4     4   8 my ($cells, $sheet) = @{ ${ shift() } }; # First arg is 'self'
  4         7  
  4         13  
2984 4         14 my $key = shift; # Second arg is key
2985 4         8 my $mutating = @_; # Third arg exists only for STORE
2986 4         10 my $colx = $$sheet->{colx};
2987 4         16 my $cx = $colx->{$key};
2988              
2989 4 50       21 if (! defined $cx) {
2990 0 0       0 exists($colx->{$key})
2991             or croak "'$key' is an unknown COLSPEC. The valid keys are:\n",
2992             $sheet->_fmt_colx();
2993             # Undef colx results from alias({optional => TRUE},...) which failed,
2994             # or from an alias which became invalid because the column was deleted.
2995 0 0       0 croak "Attempt to write to 'optional' alias '$key' which is currently NOT DEFINED"
2996             if $mutating;
2997             return \undef # Reading such a column returns undef
2998 0         0 }
2999 4   33     6 $cx <= $#{$cells}
  4         17  
3000             // croak "BUG?? key '$key' maps to cx $cx which is out of range!";
3001 4         45 \$cells->[$cx]
3002             }
3003             sub FETCH {
3004 4     4   22 ${ &_cellref }
  4         15  
3005             }
3006             sub STORE {
3007 0     0     my $r = &_cellref;
3008 0           $$r = shift;
3009             }
3010             sub NEXTKEY {
3011 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3012 0           each %{ $$sheet->{colx} }
  0            
3013             }
3014             sub FIRSTKEY {
3015 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3016 0           my $colx = $$sheet->{colx};
3017 0           my $a = scalar keys %$colx; # reset iterator
3018 0           each %$colx;
3019             }
3020             sub EXISTS {
3021 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3022 0           my $key = shift;
3023 0           exists $$sheet->{colx}->{$key}
3024             }
3025             sub SCALAR {
3026 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3027 0           scalar %{ $$sheet->{colx} }
  0            
3028             }
3029 0     0     sub DELETE { confess "DELETE not allowed for ".__PACKAGE__ }
3030 0     0     sub CLEAR { confess "CLEAR not allowed for ".__PACKAGE__ }
3031              
3032             # End package Spreadsheet::Edit::Magicrow;
3033             #====================================================================
3034              
3035             1;
3036             __END__
3037              
3038             =pod
3039              
3040             =encoding UTF-8
3041              
3042             =head1 NAME
3043              
3044             Spreadsheet::Edit - Slice and dice spreadsheets, optionally using tied variables.
3045              
3046             =head1 NON-OO SYNOPSIS
3047              
3048             use Spreadsheet::Edit qw(:all);
3049              
3050             # Examples assume a spreadsheet with these titles in the first row:
3051             # "Account Number" "Customer's Name" "Email" "Home-phone" "Income"
3052              
3053             read_spreadsheet "mailing_list.xls!Sheet1";
3054              
3055             # alias an identifier to a long or complicated title
3056             alias Name => qr/customer/i; # matches "Customer's Name"
3057              
3058             # ------------ without tied column variables -----------
3059              
3060             # Print the data
3061             printf "%20s %8s %8s %-13s %s\n", "Name","A/N","Income","Phone","Email";
3062             apply {
3063             printf "%20s %8d %8.2f %-13s %s\n",
3064             $crow{Name}, # this key is an explicit alias
3065             $crow{"Account Number"}, # ...actual title
3066             $crow{Income}, # ...actual title
3067             $crow{Home_phone}, # ...auto-generated alias
3068             $crow{Email} ; # ...actual title
3069             };
3070              
3071             # Randomly access rows.
3072             print "Row 42: Column 'C' is ", $rows[41]{C}, "\n";
3073             print "Row 42: Customer's Name is ", $rows[41]{Name}, "\n";
3074             print "Row 42: 3rd column is ", $rows[41][2], "\n";
3075              
3076             # Split the "Customer's Name" into separate FName and LName columns
3077             insert_cols '>Name', "FName", "LName";
3078             apply {
3079             ($crow{FName}, $crow{LName}) = ($crow{Name} =~ /(.*) (.*)/)
3080             or die logmsg "Could not parse Name"; # logmsg adds current row number
3081             };
3082             delete_cols "Name";
3083              
3084             # Sort by last name
3085             sort_rows { $a->{LName} cmp $b->{LName} };
3086              
3087             # ------------ using tied column variables -----------
3088              
3089             our $Name; # 'Name' is the explicit alias created above
3090             our $Account_Number; # Auto-generated alias for "Account Number"
3091             our $Home_phone; # ditto
3092             our $Income; # 'Income' is an actual title
3093             our $Email; # ditto
3094             our ($FName, $LName); # These columns do not yet exist
3095              
3096             tie_column_vars "Name", "Account_Number",
3097             qr/phone/, qr/^inc/i, "FName", "LName";
3098              
3099             # Print the data
3100             printf "%20s %8s %8s %-13s %s\n", "Name","A/N","Income","Phone","Email";
3101             apply {
3102             printf "%20s %8d %8.2f %-13s%s\n",
3103             $Name, $Account_Number, $Income, $Home_phone, $Email;
3104             };
3105              
3106             # Split the "Customer's Name" into separate FName and LName columns
3107             insert_cols '>Name', "FName", "LName";
3108             apply {
3109             ($FName, $LName) = ($Name =~ /^(\S+) (\S+)$/)
3110             or die logmsg "Could not parse Name";
3111             };
3112             delete_cols "Name";
3113              
3114             # Simple mail-merge
3115             use POSIX qw(strftime);
3116             apply {
3117             return
3118             if $Income < 100000; # not in our audience
3119             open SENDMAIL, "|sendmail -t -oi" || die "pipe:$!";
3120             print SENDMAIL "To: $FName $LName <$Email>\n";
3121             print SENDMAIL strftime("Date: %a, %d %b %y %T %z\n", localtime(time));
3122             print SENDMAIL <<EOF ;
3123             From: sales\@example.com
3124             Subject: Help for the 1%
3125              
3126             Dear $FName,
3127             If you have disposable income, we can help with that.
3128             Sincerely,
3129             Your investment advisor.
3130             EOF
3131             close SENDMAIL || die "sendmail failed ($?)\n";
3132             };
3133              
3134             # ------------ multiple sheets --------------
3135              
3136             our ($Foo, $Bar, $Income);
3137              
3138             read_spreadsheet "file1.csv";
3139             tie_column_vars; # tie all vars that ever become valid
3140              
3141             my $s1 = sheet undef ; # Save ref to current sheet & forget it
3142              
3143             read_spreadsheet "file2.csv"; # Auto-creates sheet bc current is undef
3144             tie_column_vars;
3145              
3146             my $s2 = sheet ; # Save ref to second sheet
3147              
3148             print "$Foo $Bar $Income\n"; # these refer to $s2, the current sheet
3149              
3150             sheet $s1 ;
3151             print "$FName $LName $Income\n"; # these now refer to the original sheet
3152              
3153             # ------------ create sheet from memory --------------
3154              
3155             my $s3 = new_sheet
3156             data_source => "my own data",
3157             rows => [
3158             ["This is a row before the title row" ],
3159             ["Full Name", "Address", "City", "State", "Zip" ],
3160             ["Joe Smith", "123 Main St", "Boston", "CA", "12345"],
3161             ["Mary Jones", "999 Olive Drive", "Fenton", "OH", "67890"],
3162             ],
3163             ;
3164             $s3->title_rx(1);
3165             ...
3166              
3167             =head1 OO SYNOPSIS
3168              
3169             use Spreadsheet::Edit ();
3170              
3171             my $sheet = Spreadsheet::Edit->new();
3172             $sheet->read_spreadsheet("mailing_list.xls!sheet name");
3173             $sheet->alias( Name => qr/customer/i ); # matches "Customer's Name"
3174              
3175             # Randomly access rows.
3176             # Sheet objects, when used as an ARRAYref, act like \@rows
3177             print "Row 42: Name is ", $sheet->[41]{Name}, "\n";
3178             print "Row 42, Column 3 is ", $sheet->[41][2], "\n";
3179              
3180             # Print the data.
3181             # Sheet objects, when used as an HASHref, act like \%crow
3182             printf "%20s %8s %8s %-13s %s\n", "Name","A/N","Income","Phone","Email";
3183             $sheet->apply( sub{
3184             printf "%20s %8d %8.2f %-13s%s\n",
3185             $sheet->{Name},
3186             $sheet->{"Account Number"},
3187             $sheet->{Income},
3188             $sheet->{Home_phone},
3189             $sheet->{Email} ;
3190             });
3191              
3192             # Another way:
3193             $sheet->apply( sub{
3194             my $r = $sheet->crow();
3195             printf "%20s %8d %8.2f %-13s%s\n",
3196             $r->{Name}, $r->{"Account Number"}, $r->{Income},
3197             $r->{Home_phone}, $r->{Email} ;
3198             });
3199              
3200             # Another way:
3201             $sheet->apply( sub{
3202             my $r = $sheet->crow();
3203             printf "%20s %8d %8.2f %-13s%s\n",
3204             $r->[0], $r->[1], $r->[4], $r->[3], $r->[2] ;
3205             });
3206              
3207             # Split the "Customer's Name" into separate FName and LName columns
3208             $sheet->insert_cols('>Name', "FName", "LName");
3209             $sheet->apply( sub {
3210             my $r = $sheet->crow();
3211             ($r->{FName}, $r->{LName}) = ($r->{Name} =~ /(.*) (.*)/)
3212             or die Spreadsheet::Edit::logmsg("Could not parse Name");
3213             });
3214             $sheet->delete_cols( "Name" );
3215              
3216              
3217             =head1 INTRODUCTION
3218              
3219             =over
3220              
3221             You may want to skip ahead to "LIST OF FUNCTIONS (and OO methods)".
3222              
3223             =back
3224              
3225             Columns may be referenced by title without knowing their positions.
3226             Optionally, global (package) variables may be tied to columns and used
3227             during C<apply()>.
3228              
3229             Data tables can come from Spreadsheets, CSV files, or your code.
3230              
3231             A table in memory (that is, a C<sheet> object) contains an array of rows,
3232             each of which is an array of cell values.
3233              
3234             Rows are overloaded to act as either arrays or hashes; when used as a hash,
3235             cells are accessed by name (e.g. column titles), or letter code ("A", "B" etc.)
3236              
3237             The usual paradigm is to iterate over rows applying a function
3238             to each, vaguely inspired by 'sed' and 'awk' (see C<apply> below).
3239             Random access is also supported.
3240              
3241             Note: Only cell I<values> are handled; there is no provision
3242             for processing formatting information from spreadsheets.
3243             The author has a notion to add support for formats,
3244             perhaps integrating with Spreadsheet::Read and Spreadsheet::Write
3245             or the packages they use. Please contact the author if you want to help.
3246              
3247             =head3 HOW TO IMPORT
3248              
3249             By default only functions are imported, but to fully use the functional API:
3250              
3251             use Spreadsheet::Edit ':all';
3252              
3253             which imports functions and helper variables (see STANDARD SHEET VARIABLES
3254             and VARIABLES USED DURING APPLY).
3255              
3256             You can rename imported items using the '-as' notation shown in
3257             L<Exporter::Tiny::Manual::QuickStart>.
3258              
3259             Purely-OO applications can L<use Spreadsheet::Edit ();>.
3260              
3261             =head1 THE 'CURRENT SHEET'
3262              
3263             I<Functions> and helper variables implicitly operate on a
3264             package-global "current sheet" object, which can be switched at will.
3265             OO I<Methods> operate on the C<sheet> object they are called on.
3266              
3267             Functions which operates on the "current sheet" have
3268             corresponding OO methods with the same names and arguments
3269             (note that method args must be enclosed by parenthesis).
3270              
3271             =head1 TIED COLUMN VARIABLES
3272              
3273             Package variables can refer directly to columns in the 'current sheet'
3274             during C<apply>. For example C<$Email> and C<$FName> in
3275             the SYNOPSIS above.
3276              
3277             See C<tie_column_vars> for details.
3278              
3279             =head1 LIST OF FUNCTIONS (and OO methods)
3280              
3281             In the following, {OPTIONS} refers to an optional first argument
3282             which, if present, is a hashref giving additional parameters.
3283             For example in
3284              
3285             read_spreadsheet {sheetname => 'Sheet1'}, '/path/to/file.xlsx';
3286              
3287             the {...} hash is optional and specifies the sheet name.
3288              
3289             =head2 read_spreadsheet CSVFILEPATH
3290              
3291             =head2 read_spreadsheet SPREADSHEETPATH
3292              
3293             =head2 read_spreadsheet "SPREADSHEETPATH!SHEETNAME"
3294              
3295             Replace any existing data with content from the given file.
3296              
3297             The Functional API will create a new sheet object if
3298             there is no "current sheet" (see also C<sheet> and C<new_sheet>);
3299              
3300             The file may be a .csv or any format supported by Libre Office or gnumeric.
3301              
3302             By default column titles are auto-detected and
3303             an exception is thrown if a plausible title row can not be found.
3304             {OPTIONS} may include:
3305              
3306             Auto-detection options:
3307              
3308             =over 2
3309              
3310             required => COLSPEC or [COLSPEC,...] # any required title(s)
3311             min_rx => NUM, # first rx which may contain the title row.
3312             max_rx => NUM, # maximum rx which may contain the title row.
3313             first_cx => NUM, # first column ix which must contain required titles
3314             last_cx => NUM, # last column ix which must contain required titles
3315              
3316             =back
3317              
3318             The first row is used which includes the C<required> title(s), if any,
3319             and has non-empty titles in all columns, or columns
3320             C<first_cx> through C<last_cx>.
3321              
3322             Or to specify the title row explicitly:
3323              
3324             =over 2
3325              
3326             title_rx => rx # title row index (first is 0)
3327             title_rx => undef # specify that there are no titles
3328              
3329             =back
3330              
3331             See also the C<title_rx> function/method.
3332              
3333             Other options:
3334              
3335             =over 6
3336              
3337             =item sheetname => SHEETNAME
3338              
3339             Specify which sheet in a multi-sheet workbook (i.e. spreadsheet file) to read.
3340             Alternatively, the sheet name may be appended to the input
3341             path after '!' as shown in the example.
3342              
3343             If no SHEETNAME is given then the sheet which was "active" when the
3344             workbook was saved will be retrieved.
3345              
3346             =item silent => bool
3347              
3348             =item verbose => bool
3349              
3350             =item debug => bool
3351              
3352             Probably what you expect.
3353              
3354             =item Other C<< key => value >> pairs override details of CSV parsing.
3355              
3356             See L<Text::CSV>. UTF-8 encoding is assumed by default.
3357              
3358             =back
3359              
3360             Due to bugs in Libre/Open Office, spreadsheet files can not
3361             be read if LO/OO is currently running, even
3362             for unrelated purposes (see "BUGS").
3363             This problem does not occur with .csv files
3364              
3365             =head2 alias IDENT => COLSPEC, ... ;
3366              
3367             =head2 alias IDENT => qr/regexp/, ... ;
3368              
3369             Create alternate identifiers for specified columns.
3370              
3371             Each IDENT, which must be a valid Perl identifier, will henceforth
3372             refer to the specified column even if the identifier is the same
3373             as the title or letter code of a different column.
3374              
3375             C<$row{IDENT}> and a tied variable C<$IDENT> will refer to the specified column.
3376              
3377             Aliases automatically track the column if it's position changes.
3378              
3379             Regular Expressions are matched against titles only, and an exception is
3380             thrown if more than one title matches.
3381              
3382             Otherwise, COLSPECs may be titles, existing alias names, column letters, etc.
3383             (see "COLUMN SPECIFIERS" for details).
3384              
3385             The COLSPEC is evaluated before the alias is created, so
3386              
3387             alias B => "B";
3388              
3389             would make "B" henceforth refer to the column with title "B", if one exists,
3390             or the second column (treating COLSPEC as a letter-code), even if the column
3391             is later moved.
3392              
3393             RETURNS: The 0-based column indices of the aliased column(s).
3394              
3395             =head2 unalias IDENT, ... ;
3396              
3397             Forget alias(es). Any masked COLSPECs become usable again.
3398              
3399             =head2 tie_column_vars VARNAME, ...
3400              
3401             Create tied package variables (scalars) for use during C<apply>.
3402              
3403             Each variable corresponds to a column, and reading or writing
3404             it accesses the corresponding cell in the row being visited during C<apply>.
3405              
3406             The '$' may be omitted in the VARNAME arguments to C<tie_column_vars>;
3407              
3408             You must separately declare these variables with C<our $NAME>,
3409             except in the special case described
3410             at "Use in BEGIN() or module import methods".
3411              
3412             The variable name itself implies the column it refers to.
3413              
3414             Variable names may be:
3415              
3416             =over
3417              
3418             =item * User-defined alias names (see "alias")
3419              
3420             =item * Titles which happen to be valid Perl identifiers
3421              
3422             =item * Identifiers derived from titles by replacing offending characters
3423             with underscrores (see "AUTOMATIC ALIASES"),
3424              
3425             =item * Spreadsheet column letters like "A", "B" etc.
3426              
3427             =back
3428              
3429             See "CONFLICT RESOLUTION" for how ambiguity is resolved.
3430              
3431             Multiple calls accumulate, including with different sheets.
3432              
3433             Variable bindings are dynamically evaluated during each access by using the
3434             variable's identifier as a COLSPEC with the 'current sheet' in your package.
3435             This means that it does not matter which sheet
3436             was 'current' when C<tie_column_vars> was called with a particular name;
3437             it only matters that the name of a tied variable is a valid COLSPEC in
3438             the 'current sheet' when that variable is referenced
3439             (otherwise a read returns I<undef> and a write throws an exception).
3440             [*Need clarification*]
3441              
3442             B<{OPTIONS}> may specify:
3443              
3444             =over
3445              
3446             =item package => "pkgname"
3447              
3448             Tie variables in the specified package instead of the caller's package.
3449              
3450             =item verbose => bool
3451              
3452             =item debug => bool
3453              
3454             Print trace messages.
3455              
3456             =back
3457              
3458             =head2 tie_column_vars ':all'
3459              
3460             With the B<:all> token I<all possible variables> are tied, corresponding
3461             to the aliases, titles, non-conflicting column letters etc. which exist
3462             for the current sheet.
3463              
3464             In addition, variables will be tied in the future I<whenever new identifiers
3465             become valid> (for example when a new C<alias> is created, column added,
3466             or another file is read into the same sheet).
3467              
3468             Although convenient this is B<insecure> because malicious
3469             titles could clobber unintended globals.
3470              
3471             If VARNAMES are also specified, those variables will be tied
3472             immediately even if not yet usable; an exception occurs if a tied variable
3473             is referenced before the corresponding alias or title exists.
3474             [*Need clarification* -- exception even for reads??]
3475              
3476             =head2 Use in BEGIN{} or module import methods
3477              
3478             C<tie_column_vars> B<imports> the tied variables into your module,
3479             or the module specified with package => "pkgname" in {OPTIONS}.
3480              
3481             It is unnecessary to declare tied variables if the import
3482             occurs before code is compiled which references the variables. This can
3483             be the case if C<tie_column_vars> is called in a BEGIN{} block or in the
3484             C<import> method of a module loaded with C<use>.
3485              
3486             L<Spreadsheet::Edit::Preload> makes use of this.
3487              
3488             =head2 $rowindex = title_rx ;
3489              
3490             =head2 title_rx ROWINDEX ;
3491              
3492             =head2 title_rx undef ;
3493              
3494             Get or set the title row index. When setting, titles in that row are
3495             immediately (re-)examined and the corresponding COLSPECs become valid,
3496             e.g. you can reference a column by it's title or a derived identifier.
3497              
3498             Note: Setting C<title_rx> this way is rarely needed because
3499             by default C<read_spreadsheet> sets the title row.
3500              
3501             Setting to C<undef> disables titles; any
3502             existing COLSPECs derived from titles are invalidated.
3503              
3504             =head2 title_rx {AUTODETECT_OPTIONS} 'auto';
3505              
3506             Immediately perform (or repeat) auto-detection of the title row.
3507             See C<read_spreadsheet> for a description of the options.
3508              
3509             =head2 apply {code} [COLSPEC*] ;
3510              
3511             =head2 apply_all {code} [COLSPEC*] ;
3512              
3513             =head2 apply_torx {code} RX-OR-RXLIST [,COLSPEC*] ;
3514              
3515             =head2 apply_exceptrx {code} RX-OR-RXLIST [,COLSPEC*] ;
3516              
3517             Execute the specified code block (or referenced sub) once for each row.
3518              
3519             Note that there is no comma after a bare {code} block.
3520              
3521             While executing your code, tied column variables and
3522             the sheet variables C<@crow>, C<%crow>, C<$rx> and C<$linenum>
3523             and corresponding OO methods will refer to the row being visited.
3524              
3525             If a list of COLSPECs is specified, then
3526              
3527             =over 2
3528              
3529             @_ is bound to the columns in the order specified
3530             $_ is bound to the first such column
3531              
3532             =back
3533              
3534             C<apply> normally visits all rows which follow the title row, or all rows
3535             if there is no title row.
3536              
3537             C<apply_all> unconditionally visits every row, including any title row.
3538              
3539             C<apply_torx> or C<apply_exceptrx> visit exactly the indicated rows.
3540             RX-OR-RXLIST may be either a single row index or a [list of rx];
3541              
3542             Rows may be safely inserted or deleted during 'apply';
3543             rows inserted after the currently-being-visited row will be visited
3544             at the proper time.
3545              
3546             Nested and recursive C<apply>s are allowed.
3547             When an 'apply' changes the 'current sheet',
3548             tied variables then refer to the other sheet and
3549             any C<apply> active for that sheet.
3550             With nested 'apply's, take care to restore the original sheet
3551             before returning (C<Guard::scope_guard> is useful).
3552              
3553             B<MAGIC VARIABLES USED DURING APPLY>
3554              
3555             These variables refer to the row currently being visited:
3556              
3557             =over 2
3558              
3559             B<@crow> is an array aliased to the current row's cells.
3560              
3561             B<%crow> is a hash aliased to the same cells,
3562             indexed by alias, title, letter code, etc. (any COLSPEC).
3563              
3564             B<$rx> is the 0-based index of the current row.
3565              
3566             B<$linenum> is the starting line number of the current row if the
3567             data came from a .csv file.
3568              
3569             For example, the "Account Number" column in the SYNOPSIS may be accessed
3570             many ways:
3571              
3572             alias AcNum => "Account Number";
3573             apply {
3574              
3575             $crow{"Account Number"} # %crow indexed by title
3576             $crow{AcNum} # using an explicit alias
3577             $crow{Account_Number} # using the AUTOMATIC ALIAS
3578              
3579             $crow[ $colx{"Account Number"} ]; # @crow indexed by a 0-based index
3580             $crow[ $colx{"AcNum"} ]; # ...obtained from %colx
3581             $crow[ $colx{"Account_Number"} ]; #
3582              
3583             $rows[$rx]->[ $colx{Account_Number} ] # Directly accessing @rows
3584              
3585             # See "TIED COLUMN VARIABLES" for a sweeter alternative
3586             };
3587              
3588             =back
3589              
3590             =head2 delete_col COLSPEC ;
3591              
3592             =head2 delete_cols COLSPEC+ ;
3593              
3594             The indicated columns are removed. Remaining title bindings
3595             are adjusted to track shifted columns.
3596              
3597             =head2 only_cols COLSPEC+ ;
3598              
3599             All columns I<except> the specified columns are deleted.
3600              
3601             =head2 move_col POSITION, SOURCE ;
3602              
3603             =head2 move_cols POSITION, SOURCES... ;
3604              
3605             Relocate the indicated column(s) (C<SOURCES>) so they are adjacent, in
3606             the order specified, starting at the position C<POSITION>.
3607              
3608             POSITION may be ">COLSPEC" to place moved column(s)
3609             immediately after the indicated column (">$" to place at the end),
3610             or POSITION may directly specify the destination column
3611             using an unadorned COLSPEC.
3612              
3613             A non-absolute COLSPEC indicates the I<initial> position of the referenced column.
3614              
3615             =head2 insert_col POSITION, newtitle ;
3616              
3617             =head2 insert_cols POSITION, newtitles... ;
3618              
3619             One or more columns are created starting at a position
3620             specified the same way as in C<move_cols> (later columns
3621             are moved rightward).
3622              
3623             POSITION may be ">$" to place new column(s) at the far right.
3624              
3625             A new title must be specified for each new column.
3626             If there is no title row, specify C<undef> for each position.
3627              
3628             Returns the new column index or indices.
3629              
3630             =head2 split_col {code} COLSPEC, POSITION, newtitles... ;
3631              
3632             New columns are created starting at POSITION as with C<insert_cols>,
3633             and populated with data from column COLSPEC.
3634              
3635             C<{code}> is called for each row with $_ bound to the cell at COLSPEC
3636             and @_ bound to cell(s) in the new column(s). It is up to your code to
3637             read the old column ($_) and write into the new columns (@_).
3638              
3639             The old column is left as-is (not deleted).
3640              
3641             If there is no title row, specify C<undef> for each new title.
3642              
3643             =head2 sort_rows {rx cmp function}
3644              
3645             =head2 sort_rows {rx cmp function} $first_rx, $last_rx
3646              
3647             If no range is specified, then the range is the
3648             same as for C<apply> (namely: All rows after the title row).
3649              
3650             In the comparison function, globals $a and $b will contain row objects, which
3651             are dual-typed to act as either an array or hash ref to the cells
3652             in their row. The corresponding original row indicies are also passed
3653             as parameters in C<@_>.
3654              
3655             Rows are not actually moved until after all comparisons have finished.
3656              
3657             RETURNS: A list of the previous row indicies of all rows in the sheet.
3658              
3659             # Sort on the "LName" column using row indicies
3660             # (contrast with the example in SYNOPSIS which uses $a and $b)
3661             sort_rows { my ($rxa, $rxb) = @_;
3662             $rows[$rxa]{LName} cmp $rows[$rxb]{LName}
3663             };
3664              
3665             =head2 rename_cols COLSPEC, "new title", ... ;
3666              
3667             Multiple pairs may be given. Title cell(s) are updated as indicated.
3668              
3669             Existing user-defined aliases are I<not> affected, i.e.,
3670             they continue to refer to the same columns as before even if the titles changed.
3671              
3672             =head2 join_cols_sep STRING COLSPEC+ ;
3673              
3674             =head2 join_cols {code} COLSPEC+ ;
3675              
3676             The specified columns are combined into the first-specified column and the other
3677             columns are deleted.
3678              
3679             The first argument of C<join_cols_sep> should be a fixed separator.
3680             The first argument of C<join_cols> may be a {code} block or subref;
3681              
3682             If a separator string is specified it is used to join column content
3683             together.
3684              
3685             If a {code} block or sub ref is specified,
3686             it is executed once for each row following the title row,
3687             with $_ bound to the first-named column, i.e. the surviving column,
3688             and @_ bound to all named columns in the order given.
3689              
3690             It is up to your code to combine the data by reading
3691             @_ and writing $_ (or, equivalently, by writing $_[0]).
3692              
3693             =head2 reverse_cols
3694              
3695             The order of the columns is reversed.
3696              
3697             =head2 insert_row
3698              
3699             =head2 insert_row 'END' [,$count]
3700              
3701             =head2 insert_rows $rowx [,$count]
3702              
3703             Insert one or more empty rows at the indicated position
3704             (default: at end). C<$rowx>, if specified, is either a 0-based offset
3705             for the new row or 'END' to add the new row(s) at the end.
3706             Returns the index of the first new row.
3707              
3708             =head2 delete_rows $rowx,... ;
3709              
3710             =head2 delete_rows 'LAST',... ;
3711              
3712             The indicated data rows are deleted. C<$rowx> is a zero-based row index
3713             or the special token "LAST" to indicate the last row (same as C<$#rows>).
3714             Any number of rows may be deleted in a single command, listed individually.
3715              
3716             =for Pod::Coverage delete_row
3717              
3718             =head2 transpose
3719              
3720             Invert the relation, i.e. rotate and flip the table.
3721             Cells A1,B1,C1 etc. become A1,A2,A3 etc.
3722             Any title_rx is forgotten.
3723              
3724             =for future =head2 $href = read_workbook SPREADSHEETPATH
3725             =for future
3726             =for future **NOT YET IMPLEMENTED**
3727             =for future
3728             =for future [Function only, not callable as a method]
3729             =for future All sheets in the specified document are read into memory
3730             =for future without changing the 'current sheet'. A hashref is returned:
3731             =for future
3732             =for future {
3733             =for future "sheet name" => (Spreadsheet::Edit object),
3734             =for future ...for each sheet in the workbook...
3735             =for future }
3736             =for future
3737             =for future To access one of the workbook sheets, execute
3738             =for future
3739             =for future sheet $href->{"sheet name"}; # or call OO methods on it
3740             =for future
3741             =for future If SPREADSHEETPATH was a .csv file then the resulting hash will have only
3742             =for future one member with an indeterminate key.
3743              
3744             =head2 write_csv *FILEHANDLE
3745              
3746             =head2 write_csv $path
3747              
3748             Write the current data to the indicated path or open file handle as
3749             a CSV text file.
3750             The default encoding is UTF-8 or, if C<read_spreadsheet> was most-recently
3751             used to read a csv file, the encoding used then.
3752              
3753             {OPTIONS} may include
3754              
3755             =over 6
3756              
3757             =item options for Text::CSV
3758              
3759             Usually none need be specified because we supply sane defaults.
3760              
3761             =item silent => bool
3762              
3763             =item verbose => bool
3764              
3765             =item debug => bool
3766              
3767             =back
3768              
3769             =head2 write_spreadsheet OUTPUTPATH
3770              
3771             Write the current data to a spreadsheet (.ods, .xlsx, etc.) by
3772             first writing to a temporary CSV file and then importing that file into
3773             a new spreadsheet.
3774              
3775             {OPTIONS} may include
3776              
3777             =over 6
3778              
3779             =item col_formats => [ LIST ]
3780              
3781             REQUIRED OPTION.
3782             EXPERIMENTAL, likely to change when Spreadsheet::Read is integrated!
3783              
3784             Elements of LIST may be "" (Standard), "Text", "MM/DD/YY", "DD/MM/YY", or
3785             "YY/MM/DD" to indicate the format of the corresponding column. The meaning
3786             of "Standard" is not well documented but appears to mean "General Number"
3787             in most cases. For details, see "Format Codes" in L<this old Open Office
3788             documentation|https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options#Filter_Options_for_the_CSV_Filter>.
3789              
3790             =item silent => bool
3791              
3792             =item verbose => bool
3793              
3794             =item debug => bool
3795              
3796             =back
3797              
3798              
3799             =head2 options NAME => EXPR, ... ;
3800              
3801             =head2 options NAME ;
3802              
3803             Set or retrieve miscellaneous sheet-specific options.
3804             When setting, the previous value of
3805             the last option specified is returned. The only options currently defined
3806             are I<silent>, I<verbose> and I<debug>.
3807              
3808             =head2 $hash = attributes ;
3809              
3810             Returns a reference to a hash in which you may store arbitrary data
3811             associated with the sheet object.
3812              
3813             =head2 spectocx COLSPEC or qr/regexp/, ... ;
3814              
3815             Returns the 0-based indicies of the specified colomn(s).
3816             Throws an exception if there is no such column.
3817             A regexp may match multiple columns.
3818             See also C<%colx>.
3819              
3820             =head2 new_sheet
3821              
3822             [functional API only]
3823             Create a new empty sheet and make it the 'current sheet', returning the
3824             sheet object.
3825              
3826             Rarely used because a new sheet is automatically created by
3827             C<read_spreadsheet> if your package has no current sheet.
3828              
3829             {OPTIONS} may include:
3830              
3831             =over 6
3832              
3833             =item data_source => "text..."
3834              
3835             This string will be returned by the C<data_source> method,
3836             overriding any default.
3837              
3838             =item rows => [[A1_value,B1_value,...], [A2_value,B2_value,...], ...],
3839              
3840             =item linenums => [...] #optional
3841              
3842             This makes the C<sheet> object hold data already in memory.
3843             The data should not be modified directly while the sheet C<object> exists.
3844              
3845             =item clone => $existing_sheet
3846              
3847             A deep copy of an existing sheet is made.
3848              
3849             =item num_cols => $number # with no initial content
3850              
3851             An empty sheet is created but with a fixed number of columns.
3852             When rows are later created they will be immediately padded with empty cells
3853             if necessary to this width.
3854              
3855             =back
3856              
3857             =head2 $curr_sheet = sheet ;
3858              
3859             =head2 $prev_sheet = sheet $another_sheet ;
3860              
3861             =head2 $prev_sheet = sheet undef ;
3862              
3863             [Functional API only]
3864             Retrieve, change, or forget the 'current sheet' object used
3865             by the functional API.
3866              
3867             Changing the current sheet immediately changes what is referenced by
3868             tied column variables and STANDARD SHEET VARIABLES (described later).
3869              
3870             {OPTIONS} may specify C<< package => 'pkgname' >> to operate on the specified
3871             package instead of the caller's package.
3872              
3873             Note: See C<Spreasheet::Edit-E<gt>new> if using the OO API.
3874              
3875             =head1 STANDARD SHEET VARIABLES
3876              
3877             These variables magically access the 'current sheet' in your package.
3878              
3879             =over
3880              
3881             =item @rows
3882              
3883             The spreadsheet data as an array of row objects.
3884              
3885             Each row object is "dual-typed" (overloaded) to act as either an ARRAY or HASH
3886             reference to the cells in that row.
3887              
3888             When used as a HASH ref, the key may be a
3889             alias, column title, letter-code etc. (any COLSPEC).
3890             When used as an ARRAY ref, the 0-based index specifies the column.
3891              
3892             =item @linenums
3893              
3894             The first line numbers of corresponding rows (a row can contain
3895             multiple lines if cells contain embedded newlines). Valid only if
3896             the data came from a CSV file.
3897              
3898             =item $num_cols
3899              
3900             The number of columns in the widest input row. Shorter rows are
3901             padded with empty cells when read so that all rows have the same number
3902             of columns in memory.
3903              
3904             =item $title_rx and $title_row
3905              
3906             C<$title_rx> contains the 0-based row index of the title row
3907             and C<$title_row> is an alias for C<$rows[ $title_rx ]>.
3908              
3909             The title row is auto-detected by default.
3910             See C<read_spreadsheet> and C<title_rx> for how to control this.
3911              
3912             If a column title is modified, set C<$title_rx = undef;> to force re-detection.
3913              
3914             =item %colx (column key => column index)
3915              
3916             C<< %colx >> maps aliases, titles, etc. (all currently-valid COLSPECs)
3917             to the corresponding zero-based column indicies. See "COLSPECS" .
3918              
3919             =item %colx_desc (column key => "debugging info")
3920              
3921             =back
3922              
3923             =head1 COLSPECs (COLUMN SPECIFIERS)
3924              
3925             Arguments which specify columns may be:
3926              
3927             =over
3928              
3929             =item (1) a user-defined alias identifier
3930              
3931             =item (2) an actual "column title" **
3932              
3933             =item (3) an actual title with any leading & trailing spaces removed *
3934              
3935             =item (4) an AUTOMATIC ALIAS identifier *
3936              
3937              
3938             =item (6) a Regexp (qr/.../) which matches an actual title
3939              
3940             =item (7) a numeric column index (0-based)
3941              
3942             =item (8) '^' or '$' (means first or last column, respectively)
3943              
3944              
3945             =back
3946              
3947             *These may only be used if they do not conflict with an
3948             item listed higher up.
3949              
3950             **Titles may be used directly if they can not be confused with
3951             a user-defined alias, the special names '^' or '$' or a numeric
3952             column index. See "CONFLICT RESOLUTION".
3953              
3954             B<AUTOMATIC ALIASES> are Perl I<identifiers> derived from column titles by
3955             first removing leading or trailing spaces, and then
3956             replacing non-word characters with underscores and prepending
3957             an underscore if necessary.
3958             For example:
3959              
3960             Title Automatic Alias
3961              
3962             "Address" Address (no change needed)
3963             " First Name " First_Name
3964             "First & Last" First___Last
3965             "+sizes" _sizes
3966             "1000s" _1000s (underscore avoids leading digit)
3967              
3968             Aliases (both automatic and user-defined) are valid identifiers,
3969             so can be used as the names of tied variables,
3970             bareword keys to C<%colx> and C<%crow>, and related OO interfaces,
3971              
3972             CONFLICT RESOLUTION
3973              
3974             A conflict occurs when a column key potentially refers to multiple
3975             columns. For example, "A", "B" etc. are standard column
3976             names, but they might also be the actual titles of other columns.
3977             Warnings are printed about conflicts unless the B<silent> option
3978             is true (see C<options>).
3979              
3980             =over
3981              
3982             B<User alias identifiers> (defined using C<alias>) are always valid.
3983              
3984             '^' and '$' always refer to the first and last column.
3985              
3986             Numeric "names" 0, 1, etc. always give a 0-based column index
3987             if the value is between 0 and num_cols (i.e. one past the end).
3988              
3989             B<Actual Titles> refer to to their columns, except if they:
3990              
3991             =over
3992              
3993             are the same as a user-defined alias
3994              
3995             are '^' or '$'
3996              
3997             consist only of digits (without leading 0s) corresponding
3998             to a valid column index.
3999              
4000             =back
4001              
4002             B<Automatic Aliases> and B<Standard column names> ("A", "B", etc.)
4003             are available as column keys
4004             unless they conflict with a user-defined alias or an actual title.
4005              
4006             =back
4007              
4008             Note: To unconditionally refer to numeric titles or titles which
4009             look like '^' or '$', use a Regexp B<qr/.../>.
4010             Automatic Aliases can also refer to such titles if there are no conflicts.
4011              
4012             Column positions always refer to the data before a command is
4013             executed. This is relevant for commands which re-number or delete columns.
4014              
4015             =head1 OO DESCRIPTION (OBJECT-ORIENTED INTERFACE)
4016              
4017             All the Functions listed above (except for C<new_sheet> and C<sheet>) have
4018             corresponding methods with the same arguments.
4019              
4020             However method arguments must be enclosed in parenthesis;
4021             Bare {code} blocks may not be used, so a sub{...} ref
4022             should be passed to C<apply>, etc.
4023              
4024             =head1 OO-SPECIFIC METHODS
4025              
4026             =head2 Spreadsheet::Edit->new(OPTIONS...)
4027              
4028             Creates a new "sheet" object.
4029              
4030             OPTIONS are the same as described for the C<new_sheet> Function above,
4031             except that they may be specified as key => value pairs of arguments
4032             instead of (or in addition to) an {OPTIONS} hashref.
4033              
4034             =head2 $sheet->rows() ; # Analogous to to \@rows
4035              
4036             =head2 $sheet->linenums() ; # Analogous to \@linenums
4037              
4038             =head2 $sheet->num_cols() ; # Analogous to $num_cols
4039              
4040             =head2 $sheet->colx() ; # Analogous to \%colx
4041              
4042             =head2 $sheet->colx_desc() ; # Analogous to \%colx_desc
4043              
4044             =head2 $sheet->title_rx() ; # Analogous to to $title_rx
4045              
4046             =head2 $sheet->title_row() ; # Analogous to $title_row
4047              
4048             =head2 $sheet->rx() ; # Current rx in apply, analogous to to $rx
4049              
4050             =head2 $sheet->title_rx(rxvalue) ; # Analogous to assigning to $title_rx (changes title row)
4051              
4052             =head2 $sheet->crow(); # Current row in apply (a dual-typed row object)
4053              
4054             =head2 $sheet->linenum() ; # Analogous to to $linenum
4055              
4056             =head2 $sheet->get(rx,ident) ; # Analogous to to $rows[rx]{ident}
4057              
4058             =head2 $sheet->set(rx,ident,value); # Analogous to to $rows[rx]{ident} = value
4059              
4060             =head2 $sheet->data_source(); # Returns "description of sheet" (e.g. path read)
4061              
4062             =head2 $sheet->sheetname(); # valid if input was a spreadsheet, else undef
4063              
4064             =head2
4065              
4066             =head1 UTILITY
4067              
4068             =head2 logmsg [FOCUSARG,] string, string, ...
4069              
4070             (must be explicitly imported)
4071              
4072             Concatenate strings, prefixed by a description
4073             of the 'current sheet' and row during C<apply>, if any (or with the
4074             sheet and/or row given by FOCUSARG).
4075              
4076             The resulting string is returned, with "\n" appended if it was not
4077             already terminated by a newline.
4078              
4079             The first argument is used as FOCUSARG if it is
4080             a sheet object, [sheet_object], or [sheet_object, rowindex], and specifies
4081             the sheet and/or row to describe in the message prefix.
4082             Otherwise the first argument is not special and is simply
4083             the first message string.
4084              
4085             The details of formatting the sheet may be customized with a call-back
4086             given by a C<{logmsg_pfx_gen}> attribute. See comments
4087             in the source for how this works.
4088              
4089              
4090              
4091             =head1 SEE ALSO
4092              
4093             L<Spreadsheet::Edit::Preload>
4094              
4095             =head1 BUGS
4096              
4097             Some vestigial support for formats remains from an earlier implementation,
4098             but this support is likely to be entirely replaced at some point.
4099              
4100             Reading a spreadsheet (but not csv) may fail if I<Libre Office>
4101             or I<Open Office> are currently running for any purpose; this seems to be
4102             a bug or limitation where batch-mode operations share the same profile as
4103             interactive sessions. In any case, I<ssconvert> (gnumeric) will be used
4104             if it is installed, and does not have this limitation.
4105             In the future Spreadsheet::Read might be used instead of external programs,
4106             although it uses Twig and is quite a bit slower.
4107              
4108             =head1 THREAD SAFETY
4109              
4110             Unknown, and probably not worth the trouble to find out.
4111              
4112             =head1 FUTURE IDEAS
4113              
4114             =over 4
4115              
4116             =item Add format-processing support.
4117              
4118             =item Add "column-major" views.
4119              
4120             This would allow accessing a whole column as an array.
4121             Perhaps C<@cols> and C<%cols> would be sets of column arrays
4122             (@cols indexed by column index, %cols indexed by any COLSPEC).
4123             And C<tie_column_vars '@NAME'> would tie user array variables to columns.
4124              
4125             =back
4126              
4127             =head1 AUTHOR
4128              
4129             Jim Avera (jim.avera at gmail)
4130              
4131             =head1 LICENSE
4132              
4133             Public Domain or CC0.
4134              
4135             =for Pod::Coverage meta_info
4136              
4137             =for Pod::Coverage iolayers input_encoding
4138              
4139             =for Pod::Coverage oops btw fmt_sheet fmt_list
4140              
4141             =for Pod::Coverage to_aref to_array to_wanted to_hash
4142              
4143             =for Pod::Coverage tied_varnames title2ident let2cx cx2let
4144              
4145             =cut
4146