File Coverage

blib/lib/Spreadsheet/Edit.pm
Criterion Covered Total %
statement 701 1541 45.4
branch 187 694 26.9
condition 75 365 20.5
subroutine 135 226 59.7
pod 50 63 79.3
total 1148 2889 39.7


line stmt bran cond sub pod time code
1             # License: Public Domain or CC0
2             # See https://creativecommons.org/publicdomain/zero/1.0/
3             # The author, Jim Avera (jim.avera at gmail) has waived all copyright and
4             # related or neighboring rights. Attribution is requested but is not required.
5              
6             # Pod documentation is below (use perldoc to view)
7              
8 3     3   3458641 use 5.18.0; # lexical subs with bug we have worked around
  3         33  
9 3     3   18 use strict; use warnings FATAL => 'all'; use utf8;
  3     3   6  
  3     3   86  
  3         31  
  3         6  
  3         122  
  3         16  
  3         6  
  3         66  
10 3     3   100 no warnings qw(experimental::lexical_subs);
  3         6  
  3         127  
11 3     3   34 use feature qw(state lexical_subs current_sub);
  3         14  
  3         387  
12              
13             package Spreadsheet::Edit;
14              
15             # Allow "use <thismodule> <someversion>;" in development sandbox to not bomb
16 3     3   20 { no strict 'refs'; ${__PACKAGE__."::VER"."SION"} = 1999.999; }
  3         7  
  3         496  
17              
18             our $VERSION = '1000.008'; # VERSION from Dist::Zilla::Plugin::OurPkgVersion
19             our $DATE = '2023-09-19'; # DATE from Dist::Zilla::Plugin::OurDate
20              
21             # FIXME: cmd_nesting does nothing except prefix >s to log messages.
22             # Shouldn't it skip that many "public" call frames???
23              
24             # TODO FIXME: Integrate with Spreadsheet::Read and provide a formatting API
25              
26             # TODO: Allow & support undef cell values (see Text::CSV_XS), used to
27             # represent "NULL" when interfacing with database systems.
28             # OTOH, this conflicts with failed optional alias keys
29              
30             # TODO: Add some way to exit an apply() early, e.g. return value?
31             # or maybe provide an abort_apply(resultval) function
32             # which throws an exception we can catch in apply?
33              
34             # TODO: Use Tie::File to avoid storing entire sheet in memory
35             # (requires seekable, so must depend on OpenAsCsv
36             # copying "-" to a temp file if it is not a plain file).
37              
38             #########################################
39             # sub NAMING CONVENTION:
40             # sub public_method_or_function
41             # sub _internal_method
42             # sub __internal_function
43             #########################################
44              
45             # If these globals are *defined* then the corresponding
46             # (downcased) options default accordingly when new sheets are created.
47             our ($Debug, $Verbose, $Silent);
48              
49             ############################## Exporting stuff ############################
50             #
51 3     3   1599 use Exporter::Tiny 1.001_000 (); # just to require version with non-sub generator support
  3         15810  
  3         104  
52 3     3   23 use parent "Exporter::Tiny"; # make us be a derived class
  3         7  
  3         35  
53              
54             require mro; # makes next::can available
55              
56             sub import {
57             # copied from List::Util
58             # (RT88848) Touch the caller's $a and $b, to avoid the warning of
59             # 'Name "main::a" used only once: possible typo'
60 5     5   544 my $pkg = caller;
61 3     3   323 no strict 'refs';
  3         7  
  3         507  
62 5         10 ${"${pkg}::a"} = ${"${pkg}::a"};
  5         20  
  5         30  
63 5         9 ${"${pkg}::b"} = ${"${pkg}::b"};
  5         12  
  5         18  
64              
65 5         11 my $this = $_[0];
66 5         9 goto &{ $this->next::can }; # see 'perldoc mro'
  5         47  
67             #goto &maybe::next::method ; ???
68             }
69              
70 3     3   23 use Symbol qw/gensym/;
  3         7  
  3         4856  
71              
72             our @EXPORT = qw(
73             alias apply apply_all apply_exceptrx apply_torx attributes
74             spectocx data_source delete_col delete_cols delete_row delete_rows
75             first_data_rx transpose join_cols join_cols_sep last_data_rx move_col
76             move_cols insert_col insert_cols insert_row insert_rows new_sheet only_cols
77             options read_spreadsheet rename_cols reverse_cols
78             sheet sheetname sort_rows split_col tie_column_vars title2ident title_row
79             title_rx unalias write_csv write_spreadsheet );
80              
81             my @stdvars = qw( $title_rx $first_data_rx $last_data_rx $num_cols
82             @rows @linenums @meta_info %colx %colx_desc $title_row
83             $rx $linenum @crow %crow );
84              
85             our @EXPORT_OK = (@stdvars, qw/logmsg cx2let let2cx fmt_sheet/);
86              
87             our %EXPORT_TAGS = (
88             STDVARS => [@stdvars],
89             FUNCS => [@EXPORT],
90             default => [':FUNCS'],
91             DEFAULT => [':default'],
92             #all => [qw(:STDVARS :FUNCS cx2let let2cx logmsg)],
93             all => [qw(:STDVARS :FUNCS)],
94             );
95              
96             # Although the magic globals $row, $rx and friends are "imported" by 'use',
97             # they can not simply alias variables in Spreadsheet::Edit becuase they
98             # need to be tied with parameters specific to the user's package
99             # so that each package has its own 'current sheet'.
100             #
101             # To accomplish this, we use Exporter:Tiny's "generator" mechanism to
102             # create a new, unique variables tied appropriately for each import.
103             # See perldoc Exporter::Tiny::Manual::QuickStart .
104             #
105             # If "methods" named _generate_SUBNAME, _generateScalar_SCALARNAME,
106             # _generateArray_ARRAYNAME or _generateHash_HASHNAME exist in the
107             # exporting package (that's us), then they are called to obtain
108             # a ref to an object to export as SUBNAME, SCALARNAME, ARRAYNAME or HASHNAME.
109             #
110             # For example _generateScalar_rx() is called to get a ref to an $rx variable.
111              
112             sub __gen_x {
113 36     36   100 my ($myclass, $sigilname, $args, $globals, # supplied by Exporter::Tiny
114             $Type, $helpersub, @args) = @_;
115 36 100       96 my $sigl = $Type eq 'Hash' ? '%' : $Type eq 'Array' ? '@' : '$';
    100          
116 36         65 my $tieclassname = 'Tie::Indirect::'.$Type;
117 36         51 my $ref = *{gensym()}{uc($Type)}; # e.g. "ARARY"
  36         111  
118             # e.g. tie $$ref, 'Tie::Indirect::Scalar', \&_scal_tiehelper, ...
119 36         3752 eval "tie ${sigl}\$ref, \$tieclassname, \$helpersub,
120             \$globals->{into}, \$sigilname, \@args";
121 36 50       717 die $@ if $@;
122 36         232 $ref
123             }
124             sub __gen_scalar {
125 12     12   31 my ($myclass, $sigilname, $args, $globals,
126             $canon_ident, $onlyinapply) = @_;
127 12         35 __gen_x($myclass, $sigilname, $args, $globals,
128             "Scalar", \&Spreadsheet::Edit::_scal_tiehelper,
129             $canon_ident, $onlyinapply);
130             }
131 3     3   276 sub _generateScalar_num_cols { __gen_scalar(@_, "num_cols") }
132 3     3   301 sub _generateScalar_first_data_rx { __gen_scalar(@_, "first_data_rx") }
133 3     3   275 sub _generateScalar_last_data_rx { __gen_scalar(@_, "last_data_rx") }
134 3     3   261 sub _generateScalar_rx { __gen_scalar(@_, "current_rx", 1) }
135              
136             #sub _generateScalar_title_rx { __gen_scalar(@_, "title_rx") }
137             sub _generateScalar_title_rx {
138             __gen_x(@_, "Scalar",
139             sub{
140 0     0   0 my ($mutating, $pkg, $uvar, $onlyinapply)=@_;
141 0         0 my $sheet = &Spreadsheet::Edit::__getsheet(@_);
142             \$$sheet->{title_rx}
143 0     3   0 }, 0 # onlyinapply
  3         5515  
144             )
145             }
146              
147             sub __gen_aryelem {
148 6     6   22 my ($myclass, $sigilname, $args, $globals,
149             $index_ident, $array_ident, $onlyinapply) = @_;
150             # N.B. _aryelem_tiehelper has special logic for 'current_rx' and 'title_rx'
151 6         19 __gen_x($myclass, $sigilname, $args, $globals,
152             "Scalar", \&Spreadsheet::Edit::_aryelem_tiehelper,
153             $index_ident, $array_ident, $onlyinapply);
154             }
155 3     3   264 sub _generateScalar_title_row { __gen_aryelem(@_, "title_rx", "rows") }
156 3     3   281 sub _generateScalar_linenum { __gen_aryelem(@_, "current_rx", "linenums", 1) }
157              
158             sub __gen_hash {
159 6     6   22 my ($myclass, $sigilname, $args, $globals,
160             $field_ident, $onlyinapply) = @_;
161 6         17 __gen_x($myclass, $sigilname, $args, $globals,
162             "Hash", \&Spreadsheet::Edit::_refval_tiehelper,
163             $field_ident, $onlyinapply, 0); # mutable => 0
164             }
165 3     3   276 sub _generateHash_colx { __gen_hash(@_, "colx", 0) }
166 3     3   270 sub _generateHash_colx_desc { __gen_hash(@_, "colx_desc", 0) }
167              
168             sub __gen_array{
169 9     9   26 my ($myclass, $sigilname, $args, $globals,
170             $field_ident, $onlyinapply, $mutable) = @_;
171 9         28 __gen_x($myclass, $sigilname, $args, $globals,
172             "Array", \&Spreadsheet::Edit::_refval_tiehelper,
173             $field_ident, $onlyinapply, $mutable);
174             }
175 3     3   271 sub _generateArray_rows { __gen_array(@_, "rows", 0, 1) }
176              
177             # Currently @linenums is not mutable but maybe it should be?
178 3     3   273 sub _generateArray_linenums { __gen_array(@_, "linenums", 0) }
179              
180             ## FIXME: is meta_info still valid?
181 3     3   319 sub _generateArray_meta_info { __gen_array(@_, "meta_info", 0) }
182              
183             sub __get_currentrow {
184 0     0   0 my ($mutating, $pkg, $uvar) = @_;
185             # Ignore mutating, as it applies to the element not the container
186 0         0 my $sheet = &Spreadsheet::Edit::__getsheet(0, $pkg, $uvar, 1);
187             # Returns the dual-typed ref (Magicrow) for the current row
188 0         0 $$sheet->{rows}->[$$sheet->{current_rx}]
189             }
190             sub _generateArray_crow { # @crow aliases the current row during apply
191 3     3   273 my ($myclass, $sigilname, $args, $globals) = @_;
192 3         5 my $aref = *{gensym()}{ARRAY};
  3         12  
193             tie @$aref, 'Tie::Indirect::Array',
194 3         56 \&__get_currentrow, $globals->{into}, $sigilname ;
195 3         34 $aref
196             }
197             sub _generateHash_crow { # %crow indexes cells in the current row during apply
198 3     3   251 my ($myclass, $sigilname, $args, $globals) = @_;
199 3         5 my $href = *{gensym()}{HASH};
  3         10  
200             tie %$href, 'Tie::Indirect::Hash',
201 3         61 \&__get_currentrow, $globals->{into}, $sigilname ;
202 3         89 $href
203             }
204             #
205             ########################### End of Exporting stuff ##########################
206              
207 3         14 use Spreadsheet::Edit::Log qw/log_call fmt_call log_methcall fmt_methcall oops/,
208 3     3   1594 ':btw=SE${lno}:' ;
  3         8  
209              
210 3     3   28 use Data::Dumper ();
  3         6  
  3         102  
211 3     3   26 use Data::Dumper::Interp 6.007 qw/:all/;
  3         67  
  3         56  
212              
213 3     3   78281 use Carp;
  3         8  
  3         317  
214             our @CARP_NOT = qw(Spreadsheet::Edit
215             Spreadsheet::Edit::IO
216             Tie::Indirect::Array Tie::Indirect::Hash
217             Tie::Indirect::Scalar
218             );
219              
220 3     3   25 use Scalar::Util qw(looks_like_number openhandle reftype refaddr blessed);
  3         5  
  3         191  
221 3     3   28 use List::Util qw(min max sum0 first any all pairs pairgrep);
  3         8  
  3         259  
222 3     3   852 use File::Temp qw(tempfile tempdir);
  3         20374  
  3         236  
223 3     3   24 use File::Basename qw(basename dirname fileparse);
  3         7  
  3         207  
224 3     3   24 use File::Find ();
  3         6  
  3         99  
225 3     3   17 use Symbol qw(gensym);
  3         7  
  3         159  
226 3     3   28 use POSIX qw(INT_MAX);
  3         10  
  3         43  
227 3     3   768 use Guard qw(scope_guard);
  3         572  
  3         248  
228              
229             require Tie::Indirect; # OUR CUSTOM STUFF: stop using it?
230              
231 3     3   2673 use Text::CSV 1.90_01; # 1st version with known_attributes()
  3         55981  
  3         243  
232 3         624 use Spreadsheet::Edit::IO qw(
233             OpenAsCsv @sane_CSV_read_options @sane_CSV_write_options
234             convert_spreadsheet
235 3     3   2181 sheetname_from_spec filepath_from_spec form_spec_with_sheetname);
  3         13  
236              
237 3   33     232 use constant _CALLER_OVERRIDE_CHECK_OK =>
238             (defined(&Carp::CALLER_OVERRIDE_CHECK_OK)
239 3     3   28 && &Carp::CALLER_OVERRIDE_CHECK_OK);
  3         11  
240              
241             # sub __mytraceback() { ... } archived in commit 482e09b6009
242              
243 3     3   21 use constant DEFAULT_WRITE_ENCODING => 'UTF-8';
  3         11  
  3         29546  
244             #use constant DEFAULT_READ_ENCODINGS => 'UTF-8,windows-1252';
245              
246             # This global is used by logmsg() to infer the current sheet if an apply is
247             # active, even if logmsg is called indirectly via another pkg
248             our $_inner_apply_sheet; # see &_apply_to_rows
249              
250             # The "current sheet", to which tied globals refer in any given package.
251             our %pkg2currsheet;
252              
253 24     24   41 sub __looks_like_aref($) { eval{ 1+scalar(@{$_[0]}) } } #actual or overloaded
  24         37  
  24         61  
254              
255             # Utility FUNCTIONS
256             #
257             sub to_array(@) { @_ != 1 ? @_ # 0 or multiple values
258 0         0 : ref($_[0]) eq "ARRAY" ? @{$_[0]}
259 4 0   4 0 24 : ref($_[0]) eq "HASH" ? @{ %{$_[0]} } # (key, value, ...)
  0 0       0  
  0 50       0  
260             : $_[0] # just 1 value
261             }
262 0     0 0 0 sub to_aref(@) { [ to_array(@_) ] }
263 0 0   0 0 0 sub to_wanted(@) { goto &to_array if wantarray; goto &to_aref }
  0         0  
264              
265             sub to_hash(@) {
266 4 50 33 4 0 24 @_==1 && ref($_[0]) eq "HASH" ? $_[0] :
    50          
267             (@_ % 2)!=0 ? croak("odd arg count, expecting key => value pairs") :
268             { to_array(@_) }
269             }
270              
271             sub cx2let(_) { # default arg is $_
272 52     52 0 77 my $cx = shift;
273 52         72 my $ABC="A"; ++$ABC for (1..$cx);
  52         109  
274 52         140 return $ABC
275             }
276             sub let2cx(_) {
277 0     0 0 0 my $ABC = shift;
278 0         0 my $n = ord(substr($ABC,0,1,"")) - ord('A');
279 0         0 while (length $ABC) {
280 0         0 my $letter = substr($ABC,0,1,"");
281 0         0 $n = (($n+1) * 26) + (ord($letter) - ord('A'));
282             }
283 0         0 return $n;
284             }
285              
286             # Produce the "automatic alias" identifier for an arbitrary title
287             sub title2ident($) {
288 32     32 0 45 local $_ = shift;
289 32         64 s/^\s+//; s/\s+$//; s/\W/_/g; s/^(?=\d)/_/;
  32         56  
  32         60  
  32         68  
290 32         126 $_
291             }
292              
293             # Format list as "word,word,..." without parens ; Non-barewords are "quoted".
294 0     0   0 sub __fmt_uqlist(@) { join(",",map{quotekey} @_) }
  0         0  
295 0     0   0 sub __fmt_uqarray(@) { "(" . &__fmt_uqlist . ")" }
296              
297             # Format list as without parens with barewords in qw/.../
298             sub __fmt_uqlistwithqw(@) {
299 0     0   0 my $barewords;
300 0         0 my $s = "";
301 0         0 foreach (map{quotekey} @_) {
  0         0  
302 0 0       0 if (/^\w/) {
303 0 0       0 if ($barewords++) {
304 0         0 $s .= " ";
305             } else {
306 0 0       0 $s .= ", " if $s;
307 0         0 $s .= "qw/";
308             }
309             } else {
310 0 0       0 if ($barewords) {
311 0         0 $s .= "/";
312 0         0 $barewords = 0;
313             }
314 0 0       0 $s .= ", " if $s;
315             }
316 0         0 $s .= $_;
317             }
318 0 0       0 $s .= "/" if $barewords;
319 0         0 $s
320             }
321 0     0   0 sub __fmt_uqarraywithqw(@) { "(" . &__fmt_uqlistwithqw . ")" }
322              
323             # Format list of pairs as "key1 => val1, key2 => val2, ..." without parens
324             sub __fmt_pairlist(@) {
325 0     0   0 my $result = "";
326 0         0 while (@_) {
327 0 0       0 confess "Odd arg count, expecting key => value pairs" if @_==1;
328 0 0       0 $result .= ", " if $result;
329 0         0 my $key = shift @_;
330 0         0 my $val = shift @_;
331 0         0 $result .= quotekey($key)." => ".vis($val);
332             }
333             $result
334 0         0 }
335             sub __fmt_pairs(@) {
336 0     0   0 __fmt_pairlist( map{ @$_ } @_ );
  0         0  
337             }
338              
339             # Concatenate strings separated by spaces, folding as necessary
340             # (strings are never broken; internal newlines go unnoticed).
341             # All lines (including the first) are indented the specified number of
342             # spaces. Explicit line-breaks may be included as "\n".
343             # A final newline is *not* included unless the last item ends with "\n".
344             sub __fill($;$$) {
345 0     0   0 my ($items, $indent, $foldwidth) = @_;
346 0   0     0 $indent //= 4;
347 0   0     0 $foldwidth //= 72;
348 0         0 my $buf = "";
349 0         0 my $llen = 0;
350 0         0 foreach (@$items) {
351 0 0 0     0 if ($_ eq "\n" or
      0        
352             ($llen > $indent && ($llen + length($_)) > $foldwidth)) {
353 0         0 $buf .= "\n";
354 0         0 $llen = 0;
355 0 0       0 next if $_ eq "\n";
356             }
357 0 0       0 if ($llen == 0) {
358 0         0 $buf .= (" " x $indent);
359 0         0 $llen = $indent;
360             } else {
361 0 0       0 if (substr($buf,-1) =~ /\S/) {
362 0         0 $buf .= " ";
363 0         0 ++$llen;
364             }
365             }
366 0         0 $buf .= $_;
367 0         0 $llen += length();
368             }
369 0         0 $buf;
370             }
371              
372             sub __fmt_colspec_cx($$) {
373             # "cx NN" or "COLSPEC [cx NN]" or "COLSPEC (NOT DEFINED)" if undef cx
374 0     0   0 my ($colspec, $cx) = @_;
375 0 0       0 if (ref($colspec) eq "Regexp") {
376 0         0 state $delimsets = [
377             [qw(/ /)], [qw({ })], [qw([ ])], [qw<( )>], [qw(< >)], [qw(« »)] ];
378 0         0 for (@$delimsets) {
379 0         0 my ($left, $right) = @$_;
380 0 0 0     0 if (index($colspec,$left)<0 && index($colspec,$right)<0) {
381 0         0 $colspec = "qr${left}${colspec}${right}";
382 0         0 last;
383             }
384             }
385             } else {
386 0         0 $colspec = visq($colspec);
387             }
388 0 0       0 return "$colspec (NOT DEFINED)"
389             if ! defined $cx;
390 0 0       0 $colspec eq "$cx" ? "cx $cx" : "$colspec [cx $cx]"
391             }
392             sub __fmt_cx($) {
393 28     28   43 my ($cx) = @_;
394 28 50       76 defined($cx) ? "cx $cx=".cx2let($cx) : "(undefined)"
395             }
396              
397             # Format %colx keys "intelligently". cx values are not shown for keys which are
398             # absolute column refs. Keys with undef values (from alias {optional => 1})
399             # are omitted since they are not currently valid. A final newline IS included.
400             sub _fmt_colx(;$$) {
401             my $self = shift;
402             my ($indent, $foldwidth) = @_;
403             my ($colx, $num_cols) = @$$self{qw{colx num_cols}};
404             # copy %$colx omitting keys with undef cx
405             my %hash = map{ defined($colx->{$_}) ? ($_ => $colx->{$_}) : () } keys %$colx;
406             my sub sortbycx(@) { sort { ($colx->{$a}//-1) <=> ($colx->{$b}//-1) } @_ }
407             my sub subset($) { # format items, deleting from %hash
408             my $specs = shift;
409             my (@items, $curr, $curr_desc);
410             my $curr_cx = -1;
411             my sub flush() {
412             return unless $curr_cx >= 0;
413             push @items, $curr.$curr_desc;
414             $curr = $curr_desc = undef; ##DEBUGGING
415             $curr_cx = -1;
416             }
417             my sub additem($$) {
418             (local $_, my $cx) = @_;
419             flush() if $curr_cx != $cx;
420             if ($curr_cx >= 0) {
421             $curr .= ",".quotekey($_);
422             } else {
423             $curr_cx = $cx;
424             $curr = quotekey($_);
425             my $misfit = (/^[A-Z]{1,2}$/ && $colx->{$_} != let2cx($_))
426             || (/^\d+$/ && $colx->{$_} != $_)
427             || (/^\D../) # show titles with cx too
428             ;
429             $curr_desc = $misfit ? "(cx ".vis($hash{$_}).")" : "";
430             }
431             }
432             foreach (@$specs) {
433             if (ref $_) {
434             push @items, $$_; # \"string" means insert "string" literally
435             } else {
436             # Work around old Perl lexical sub limitations...
437             my $item=$hash{$_};
438             oops(dvis '$_ $specs hash=',hvis(%hash)) unless defined $item;
439             additem($_, $item);
440             delete $hash{$_} // oops;
441             }
442             }
443             flush();
444             push @items, "\n" if @items; # wrap before next subset, or at end
445             @items
446             }
447             my @ABCs = subset [ map{ my $A = cx2let($_);
448             u($hash{$A}) eq $_ ? $A : \" "
449             } 0..$num_cols-1 ];
450              
451             # More lexical sub bug work-arounds...
452             my @ss1 = subset [sortbycx grep{ /^(=.*\D)\w+$/ } keys %hash]; # normal titles
453             my @ss2 = subset [sortbycx grep{ /^\d+$/ } keys %hash]; # numeric titles
454             my @ss3 = subset [sortbycx keys %hash]; # oddities
455             __fill [
456             @ABCs,
457             @ss1,
458             @ss2,
459             @ss3,
460             ], $indent, $foldwidth;
461             }
462              
463             # Is a title a special symbol or looks like a cx number?
464             sub __unindexed_title($$) {
465 14     14   26 my ($title, $num_cols) = @_;
466 14 50       35 oops unless defined $title;
467 14 50 33     134 $title eq ""
      33        
      33        
      33        
468             || $title eq '^'
469             || $title eq '$'
470             || ( ($title =~ /^[1-9]\d*$/ || $title eq "0") # not with leading zeros
471             && $title <= $num_cols )
472             }
473             sub _unindexed_title { #method for use by tests
474 0     0   0 my $self = shift;
475 0         0 __unindexed_title(shift(), $$self->{num_cols});
476             }
477              
478             # Return (normals, unindexed) where each is [title => cx, ...] sorted by cx
479             sub _get_usable_titles {
480 6     6   12 my $self = shift;
481 6         18 my ($rows, $title_rx, $num_cols) = @$$self{qw{rows title_rx num_cols}};
482 6   33     38 my $title_row = $rows->[$title_rx // oops];
483 6         21 my @unindexed;
484             my @normals;
485 6         0 my %seen;
486 6         17 for my $cx (0 .. $num_cols-1) {
487 14         39 my $title = $title_row->[$cx];
488 14 50       41 next if $title eq "";
489 14 50       45 if ($seen{$title}++) {
490 0 0       0 $self->_carponce("Warning: Non-unique title ", visq($title), " will not be usable for COLSPEC\n") unless $$self->{silent};
491 0         0 @normals = grep{ $_->[0] ne $title } @normals;
  0         0  
492 0         0 @unindexed = grep{ $_->[0] ne $title } @unindexed;
  0         0  
493 0         0 next;
494             }
495 14 50       32 if (__unindexed_title($title, $num_cols)) {
496 0         0 push @unindexed, [$title, $cx];
497             } else {
498 14         42 push @normals, [$title, $cx];
499             }
500             }
501 9         64 [sort { $a->[1] <=> $b->[1] } @normals],
502 6         29 [sort { $a->[1] <=> $b->[1] } @unindexed]
  0         0  
503             }
504              
505             # Non-OO api: Explicitly create a new sheet and make it the "current sheet".
506             # Options (e.g. to specify initial content) may be specified in an
507             # initial {OPTHASH} and/or as linear key => value pairs.
508             #
509             # Note: Most functions automatically create an empty sheet if no sheet
510             # exists, so this is only really needed when using more than one sheet
511             # or if you want to initialize a sheet from data in memory.
512             # N.B. the corresponding OO interface is Spreadsheet::Edit->new(...)
513             #
514             sub new_sheet(@) {
515 2     2 1 10449 my $opthash = &__opthash;
516 2         7 my %opts = (%$opthash, %{to_hash(@_)}); # new() merges these anyway
  2         8  
517              
518 2         6 my ($userpkg, $fn, $lno, $subname) = @{ __filter_frame(__usercall_info()) };
  2         6  
519 2 50       12 $userpkg = delete $opts{package} if exists $opts{package};
520              
521 2 50       8 my $pkgmsg = $opts{package} ? " [for pkg $userpkg]" : "";
522 2 50 33     16 croak "Invalid 'package' ",u($userpkg),"\n"
523             unless defined($userpkg) && $userpkg =~ /^[a-zA-Z][:\w]*$/a;
524              
525 2   33     17 $opts{data_source} ||= "Created at ${fn}:$lno by $subname";
526              
527 2         17 my $sheet = __silent_new(\%opts);
528              
529             log_call [$opthash, @_], [\(fmt_sheet($sheet), \$pkgmsg)]
530 2 50       16 if $$sheet->{verbose};
531              
532 2         13 $pkg2currsheet{$userpkg} = $sheet
533             }
534              
535             # logmsg() - Concatenate strings to form a "log message", possibly
536             # prefixed with a description of a "focus" sheet and optionally
537             # a specific row. A final \n is appended if needed.
538             #
539             # The "focus" sheet and row, if any, are determined as follows:
540             #
541             # If the first argument is a sheet object, [sheet_object],
542             # [sheet_object, rx], or [sheet_object, undef] then the indicated
543             # sheet and (optionally) row are used. Note that if called as a method
544             # the first arg will be the sheet object.
545             #
546             # Otherwise the first arg is not special and is included in the message.
547             #
548             # If no sheet is identified above, then the caller's package active
549             # sheet is used, if any.
550             #
551             # If still no sheet is identified, then the sheet of the innermost apply
552             # currently executing (anywhere up the stack) is used, if any; this sheet
553             # is internally saved in a global by the apply* methods.
554             #
555             # If a sheet is identified but no specific rx, then the
556             # "current row" of an active apply on that sheet is used, if any.
557             #
558             # If a focus sheet or sheet & row were identified, then the caller-supplied
559             # message is prefixed by "(<description>):" or "(row <num> <description>):"
560             # where <description> comes from:
561             #
562             # 1) If the sheet attribute {logmsg_pfx_gen} is defined to a subref,
563             # the sub is called and all returned items other than undef are
564             # concatenated (any undefs in the returned list are ignored); otherwise
565             #
566             # 2) The "sheetname" property is used, if defined; otherwise
567             #
568             # 3) the "data_source" property is used, which defaults to the name of the
569             # spreadsheet read by read_spreadsheet().
570             #
571             # FIXME: I should either rename logmsg_pfx_gen as logmsg_sheetdesc_gen
572             # to reflect that it only generated the sheet-description part,
573             # or else make prefix generators produce the entire message prefix
574             # including any row number.
575             #
576             sub _default_pfx_gen($$) {
577 274     274   561 my ($sheet, $rx) = @_;
578 274 50       1266 confess "bug" unless ref($sheet) =~ /^Spreadsheet::Edit\b/;
579 274 50       607 ($sheet->sheetname() || $sheet->data_source())
580             }
581             sub logmsg(@) {
582 285     285 1 283140 my ($sheet, $rx);
583 285 100 100     1297 if (@_ > 0 && ref($_[0])) {
584 208 100 33     802 if (ref($_[0]) =~ /^Spreadsheet::Edit\b/) {
    50 33        
585 24         53 $sheet = shift;
586             }
587             elsif (ref($_[0]) eq "ARRAY"
588 184         1104 && @{ $_[0] } <= 2
589             && ref($_[0]->[0])."" =~ /^Spreadsheet::Edit\b/) {
590 184         312 ($sheet, $rx) = @{ shift @_ };
  184         420  
591             }
592             }
593 285         608 my $msgstr = join("", grep{defined} @_);
  320         981  
594 285 100       666 if (! defined $sheet) {
595 77         206 $sheet = $pkg2currsheet{scalar(caller)};
596             }
597 285 100       568 if (! defined $sheet) {
598 22         36 $sheet = $Spreadsheet::Edit::_inner_apply_sheet;
599             }
600 285 100       555 if (! defined $rx) {
601 125 100       256 $rx = eval{ $sheet->rx() } if defined($sheet);
  114         313  
602             }
603 285         464 my @prefix;
604 285 100       515 if (defined $sheet) {
605 274         496 push @prefix, "(";
606 274 100 100     766 if (defined($rx) && ($rx < 0 || $rx > $#{$sheet->rows})) {
      100        
607 80         247 push @prefix, "Row ".($rx+1)."[INVALID RX $rx] ";
608 80         157 $rx = undef; # avoid confusing user's prefix generator
609             } else {
610 194 100       550 push @prefix, "Row ".($rx+1)." " if defined($rx);
611             }
612 274   50     725 my $pfxgen = $sheet->attributes->{logmsg_pfx_gen} // \&_default_pfx_gen;
613 274         613 push @prefix, &$pfxgen($sheet, $rx), "): ";
614             }
615 285 100       834 my $suffix = ($msgstr =~ /\n\z/s ? "" : "\n");
616             # If the message string starts with newlines, put them before the prefix
617 285 50       715 if ($msgstr =~ s/\A(\n+)([^\n])/$2/s) {
618 0         0 unshift @prefix, $1;
619             }
620 285         503 return join "", grep{defined} @prefix, $msgstr, $suffix;
  1593         4428  
621             }
622              
623             #####################################################################
624             # Locate the nearest call to a public sub in the call stack.
625             # [Now *GENERIC*]
626             #
627             # A "public" sub means anything named starting with [a-z], irrespective
628             # of package, and not mentioned in $USERCALL_NOT.
629             # The assumption is that internal subs are named with an initial
630             # underscore or are ALLCAPS (e.g. for constants).
631             #
632             # RETURNS
633             # ([frame], [called args]) in array context
634             # [frame] in scalar context
635             #
636             # "frame" means caller(n) results:
637             # 0 1 2 3
638             # package filename linenum subname ...
639             #
640             sub __usercall_info(;$) {
641 15     15   39 for (my $lvl=1 ; ; ++$lvl) {
642 53         118 @DB::args = \$lvl if _CALLER_OVERRIDE_CHECK_OK; # see mytraceback()
643 53         77 my @frame = do{ package
644 53         568 DB; caller($lvl) };
645 53 50       139 oops dvis('$lvl @frame') unless defined($frame[0]);
646 53 100       307 if ( $frame[3] =~ /::([a-z][^:]*)/ ) {
647 15 100       159 return \@frame unless wantarray;
648 2         4 my @args;
649 2         5 my $hasargs = $frame[4];
650 2 50       5 if ($hasargs) {
651 2         6 eval{ @args = @DB::args };
  2         5  
652 2 50       6 @args=("<perl bug(?) prevented getting args>") if $@;
653             }
654 2         8 return (\@frame, \@args)
655             }
656             }
657             }
658              
659             sub __filter_frame($) { #clean-up/abbreviate for display purposes
660 5     5   11 my @frame = @{shift @_};
  5         20  
661 5         186 $frame[1] = basename $frame[1]; # filename
662 5         37 $frame[3] =~ s/.*:://; # subname
663             \@frame
664 5         44 }
665             sub __fn_ln_methname() {
666 3     3   7 @{ __filter_frame(__usercall_info()) }[1,2,3]; # (fn, lno, subname)
  3         9  
667             }
668              
669             sub __methname() {
670 0     0   0 (&__fn_ln_methname())[2]
671             }
672              
673             sub __find_userpkg() {
674 10     10   29 ${ __usercall_info() }[0];
  10         41  
675             }
676              
677             # This always returns the caller's caller's package but also
678             # checks that it is not an internal call, which should never happen
679             sub __callerpkg() {
680 1     1   20 my $pkg = (caller(1))[0];
681 1 50       7 oops if index($pkg,__PACKAGE__) == 0;
682 1         4 $pkg
683             }
684              
685             # Create a new object without allowing any logging. This is used when
686             # new() is called implicitly by something else and new's log messages
687             # might display an internal filename (namely Edit.pm).
688             #
689             # debug/verbose args are removed from the arguments passed to new()
690             # and put back into the object after it is created.
691             sub __silent_new(@) {
692 2     2   4 my $opthash = &__opthash;
693 2         8 my $new_args = to_hash(@_);
694              
695 2         4 my %saved;
696 2         6 foreach my $key (qw/verbose debug/) {
697 4 50       14 $saved{$key} = $opthash->{$key} if exists($opthash->{$key});
698 4         9 $opthash->{$key} = 0; # force off
699 4 50       12 $saved{$key} = delete($new_args->{$key}) if exists($new_args->{$key});
700             }
701              
702 2         20 my $self = Spreadsheet::Edit->new($opthash, %$new_args);
703              
704 2         12 delete @$$self{qw/verbose debug/};
705 2         7 $self->_set_verbose_debug_silent(%saved);
706              
707 2         7 $self
708             }
709              
710             #####################################################################
711             # Get "self" for a function/method combo sub:
712             # If the first arg is an object ref we shift it off and use that
713             # (i.e. assume it is called as a method); otherwise we assume it's a
714             # functional-API function call and use the caller's "current sheet"
715             # (if none exists, __self creates one but __selfmust throws).
716             #
717             # This must be used with special syntax like
718             # my $self = &__self;
719             # which re-uses @_ so we can shift @_ as seen by our caller.
720              
721             sub __self_ifexists {
722              
723             # If the first arg is an object ref, shift it off and return it;
724             # Otherwise, if the caller's "current sheet" exists, return that;
725             # otherwise return undef.
726              
727             (defined(blessed($_[0])) && $_[0]->isa(__PACKAGE__) && shift(@_))
728 1120 100 66 1120   8668 || $pkg2currsheet{__find_userpkg()};
      66        
729             }
730             sub __selfmust { # sheet must exist, otherwise throw
731 841 50   841   1293 &__self_ifexists || do{
732 0         0 my $pkg = caller(1);
733 0         0 croak __methname()," : No sheet is defined in $pkg\n"
734             };
735             }
736              
737             sub __self { # a new empty sheet is created if necessary
738 279 50   279   487 &__self_ifexists || do{
739             # Create a new empty sheet and make it the caller's "current sheet".
740 0         0 my %opts;
741 0         0 my ($frame, $args) = __usercall_info();
742              
743 0         0 my ($userpkg, $fn, $lno, $subname) = @{ __filter_frame($frame) };
  0         0  
744 0         0 $opts{data_source} = "(Created implicitly by '$subname' at ${fn}:$lno)";
745            
746 0         0 my $self = $pkg2currsheet{$userpkg} = __silent_new(\%opts);
747 0         0 $self
748             }
749             }
750              
751              
752             ## Helpers...
753              
754             sub __opthash {
755 35 100   35   130 ref($_[0]) eq "HASH" ? shift(@_) : {}
756             }
757             sub __selfmust_opthash {
758 8     8   24 my $self = &__selfmust;
759 8         40 my $opthash = &__opthash;
760 8         31 ($self, $opthash)
761             }
762             sub __self_opthash {
763 5     5   16 my $self = &__self;
764 5         23 my $opthash = &__opthash;
765 5         19 ($self, $opthash)
766             }
767             sub __selfonly {
768 274     274   501 my $self = &__self;
769 274 50       634 confess __methname, " expects no arguments!\n" if @_;
770 274         1176 $self
771             }
772             sub __selfmustonly {
773 553     553   852 my $self = &__selfmust;
774 553 50       1260 confess __methname, " expects no arguments!\n" if @_;
775 553         2127 $self
776             }
777              
778             sub __self_opthash_Nargs($@) { # (num_expected_args, @_)
779 2     2   5 my $Nargs = shift;
780 2         6 my ($self, $opthash) = &__self_opthash;
781             #croak
782 2 50       7 croak __methname, " expects $Nargs arguments, not ",scalar(@_),"\n"
783             if $Nargs != @_;
784 2         7 ($self, $opthash, @_)
785             }
786 0     0   0 sub __self_opthash_0args { unshift @_,0; goto &__self_opthash_Nargs }
  0         0  
787 2     2   7 sub __self_opthash_1arg { unshift @_,1; goto &__self_opthash_Nargs }
  2         6  
788 0     0   0 sub __self_opthash_2args { unshift @_,2; goto &__self_opthash_Nargs }
  0         0  
789 0     0   0 sub __self_opthash_3args { unshift @_,3; goto &__self_opthash_Nargs }
  0         0  
790              
791             # Check that an option hash has only valid keys, and values aren't undef
792             sub __validate_opthash($$;@) {
793 10     10   55 my ($opthash, $valid_keys, %opts) = @_;
794 10 50       27 return unless defined $opthash; # silently accept undef
795 10         36 foreach my $k (keys %$opthash) {
796             croak "Unrecognized ",($opts{desc}//"option")," '$k'"
797 1 50 0 1   11 unless first{$_ eq $k} @$valid_keys;
  1         5  
798             confess "Option '$k' must be defined"
799             if $opts{undef_ok} && !defined($opthash->{$k})
800 1 50 33     12 && !grep{$_ eq $k} @{$opts{undef_ok}};
  0   33     0  
  0         0  
801             }
802             $opthash
803 10         25 }
804              
805             # Copy verbose/debug/silent options into $self, deleting them from
806             # the provided options hash.
807             # RETURNS: Hash of original values to pass to _restore_stdopts()
808             #
809             # This is used by methods which accept {verbose} etc. options
810             # which override what is in the object for the duration of that method call.
811             sub _set_stdopts {
812 10     10   27 my ($self, $opthash) = @_;
813 10         21 my $previous = {};
814 10         31 foreach my $key (qw/verbose debug silent/) {
815 30 100       78 if (exists $opthash->{$key}) {
816 2         6 $previous->{$key} = $$self->{$key};
817 2         7 $$self->{$key} = delete($opthash->{$key});
818             }
819             }
820             $previous
821 10         27 }
822             sub _restore_stdopts {
823 10     10   17 my $self = shift;
824 10         18 my $saved = shift;
825 10         85 @$$self{keys %$saved} = values %$saved;
826             }
827              
828             sub _validate_ident($) {
829 0 0   0   0 croak "identifier is undef!" unless defined $_[0];
830 0 0       0 croak "identifier is empty" unless $_[0] ne "";
831 0 0       0 croak ivisq '"$_[0]" is not a valid identifier\n'
832             unless $_[0] eq title2ident($_[0]);
833 0         0 $_[0]
834             }
835              
836             # Check that an option hash has only valid keys
837             sub __validate_4pthash($$;$) {
838 0     0   0 my ($opthash, $valid_keys, $optdesc) = @_;
839 0 0       0 return unless defined $opthash; # silently accept undef
840 0         0 foreach my $k (keys %$opthash) {
841             croak "Unrecognized ",($optdesc//"option")," '$k'"
842 0 0 0 0   0 unless first{$_ eq $k} @$valid_keys;
  0         0  
843             }
844             $opthash
845 0         0 }
846              
847             sub __validate_nonnegi($;$) {
848 4 50 0 4   27 croak(($_[1]//"argument")." must be a non-negative integer",
      33        
849             " (not ".u($_[0]).")")
850             unless defined($_[0]) && "$_[0]" =~ /^\d+$/;
851 4         10 $_[0]
852             }
853             sub __validate_nonnegi_or_undef($;$) {
854 0 0 0 0   0 croak(($_[1]//"argument")." must be a non-negative integer or undef",
      0        
855             " (not ".u($_[0]).")")
856             unless !defined($_[0]) || "$_[0]" =~ /^\d+$/;
857 0         0 $_[0]
858             }
859              
860             sub __validate_pairs(@) {
861 4 50   4   29 unless ((scalar(@_) % 2) == 0) {
862 0 0       0 croak __methname," does not accept an {OPTIONS} hash here"
863             if (ref($_[0]) eq "HASH");
864 0         0 confess "In call to ",__methname,
865             " : uneven arg count, expecting key => value pairs"
866             }
867 4         46 foreach (pairs @_) {
868 1         4 my $key = $_->[0];
869 1 50       6 confess "In call to ",__methname," the key '$key' looks suspicious"
870             unless $key =~ /^\w+$/;
871             }
872             @_
873 4         24 }
874              
875             sub _check_rx {
876 0     0   0 my ($self, $rx, $one_past_end_ok) = @_;
877 0 0 0     0 confess __methname.": Illegal rx ",vis($rx),"\n"
878             unless ($rx//"") =~ /^\d+$/; # non-negative integer
879 0         0 my $maxrx = $#{$$self->{rows}};
  0         0  
880 0 0       0 confess __methname.": rx ".vis($rx)." is beyond the last row\n"
    0          
881             .dvis(' $$self')
882             if $rx > ($one_past_end_ok ? ($maxrx+1) : $maxrx);
883             }
884              
885             # Diagnose scalar context if there are no results.
886             sub __first_ifnot_wantarray(@) {
887 0     0   0 my $wantarray = (caller(1))[5];
888 0 0       0 return @_ if $wantarray;
889 0 0       0 return $_[0] if @_;
890 0 0       0 croak __methname, " called in scalar context but that method does not return a result.\n"
891             if defined($wantarray);
892             }
893             sub __validate_not_scalar_context(@) {
894 0     0   0 my $wantarray = (caller(1))[5];
895 0 0 0     0 croak __methname, " returns an array, not a scalar"
896             unless $wantarray || !defined($wantarray);
897             @_
898 0         0 }
899              
900             sub _carponce { # if not silent
901 0     0   0 my $self = shift;
902 0         0 my $msg = join "",@_;
903 0 0       0 return if $$self->{_carponce}->{$msg}++;
904 0 0       0 $msg .= "\n" unless $msg =~ /\n\z/s;
905             carp($msg)
906 0 0       0 unless $$self->{silent}; # never appears even if silent is later unset
907             }
908              
909             ###################### METHODS/FUNCTIONS #######################
910              
911             # Unlike other methods, new() takes key => value pair arguments.
912             # For consistency with other methods an initial {OPTIONS} hash is
913             # also allowed, but it is not special in any way and is merged
914             # with any linear args (linear args override {OPTIONS}).
915              
916             sub new { # Strictly OO, this does not affect caller's "current sheet".
917             # The corresponding functional API is new_sheet() which explicitly
918             # creates a new sheet and makes it the 'current sheet'.
919 4     4 1 9 my $classname = shift;
920 4 50 33     33 croak "Invalid/missing CLASSNAME (i.e. \"this\") arg"
921             unless defined($classname) && $classname =~ /^[\w_:]+$/;
922              
923 4         17 my $opthash = &__opthash;
924             # Special handling of {cmd_nesting) since there was no object to begin with:
925             # Internal callers may pass this as a "user" option in {OPTARGS};
926             # we won't log it, but we plant it into the object below.
927             # **THE CALLER MUST DECREMENT IT LATER IF NEEDED*
928 4   50     23 my $cmd_nesting = delete($opthash->{cmd_nesting}) // 0;
929              
930 4         22 my %opts = (verbose => $Verbose, debug => $Debug, silent => $Silent,
931             %$opthash,
932             __validate_pairs(@_));
933            
934 4         9 my $self;
935 4 50       59 if (my $clonee = delete $opts{clone}) { # untested as of 2/12/14
936 0         0 delete @opts{qw/verbose debug silent/};
937 0 0       0 croak "Other options not allowed with 'clone': ",hvis(%opts) if %opts;
938 0         0 require Clone;
939 0         0 $self = Clone::clone($clonee); # in all its glory
940             $$self->{data_source} = (delete $opts{data_source})
941 0   0     0 // "cloned from $$self->{data_source}";
942             } else {
943             my $hash = {
944             attributes => delete $opts{attributes} // {},
945             linenums => delete $opts{linenums} // [],
946             meta_info => delete $opts{meta_info} // [], ##### ???? obsolete ???
947             data_source => delete $opts{data_source}, # // "(none)",
948             num_cols => delete $opts{num_cols}, # possibly undef
949              
950             # %colx maps titles, aliases (automatic and user-defined), and
951             # spreadsheet column lettercodes to the corresponding column indicies.
952 4   50     113 colx => {},
      50        
      50        
953             colx_desc => {}, # for use in error messages
954             useraliases => {}, # key exists for user-defined alias names
955              
956             title_rx => undef,
957             first_data_rx => undef,
958             last_data_rx => undef,
959             current_rx => undef, # valid during apply()
960              
961             pkg2tiedvarnames => {},
962             pkg2tieall => {},
963             };
964              
965             # We can not use $hash directly as the object representation because %{}
966             # is overloaded, so we use a scalar ref (pointing to the hashref)
967             # as the object.
968 4         17 $self = bless \$hash, $classname;
969              
970             # Create a tied virtual array which creates Magicrows when assigned to.
971 4         7 my @rows; tie @rows, 'Spreadsheet::Edit::RowsTie', $self;
  4         34  
972 4         18 $hash->{rows} = \@rows;
973              
974 4 100       15 if (my $newdata = delete $opts{rows}) {
975 2         6 foreach (@$newdata) {
976 6         30 push @rows, $_;
977             }
978             }
979             }# not cloning
980              
981 4         70 $$self->{cmd_nesting} = $cmd_nesting;
982              
983 4         27 $self->_set_verbose_debug_silent(%opts); # croaks if other keys remain
984              
985             # Validate data, default num_cols, pad rows, etc.
986 4         53 $self->_rows_replaced();
987              
988             log_call [$opthash,@_], [\fmt_sheet($self)]
989 4 50       14 if $$self->{verbose};
990              
991 4         23 $self
992             }#new
993              
994             use overload
995             # As an ARRAYref, a sheet acts like \@rows which is (a ref to) a
996             # virtual array of Magicrow objects, each of which is a dual array/hash
997             # ref to cells in a given row (via RowsTie).
998 0     0   0 '@{}' => sub { my $hash = ${ shift() }; $hash->{rows}; },
  0         0  
  0         0  
999              
1000             # As a HASHref, a sheet acts like \%crow which is (a ref to)
1001             # the hash view of the current row during 'apply'
1002 0     0   0 '%{}' => sub { my $self = shift;
1003             # probably less efficient but avoids repeating code
1004 0         0 \%{ $self->crow() };
  0         0  
1005             },
1006             #'""' => sub { shift },
1007             #'0+' => sub { shift },
1008             #'==' => sub { my ($self, $other, $swap) = @_; $self == $other },
1009             #'eq' => sub { my ($self, $other, $swap) = @_; "$self" eq "$other" },
1010 3         30 fallback => 1,
1011 3     3   34 ;
  3         6  
1012              
1013             sub _rows_replaced { # completely new or replaced rows, linenums, etc.
1014 6     6   20 my ($self) = @_;
1015 6         14 my $hash = $$self;
1016              
1017             my ($rows, $linenums, $num_cols, $current_rx)
1018 6         43 = @$hash{qw/rows linenums num_cols current_rx/};
1019              
1020 6 50       21 croak "Can not replace sheet content during an apply!\n"
1021             if defined $current_rx;
1022 6         22 for my $rx (0..$#$rows) {
1023 12         45 my $row = $rows->[$rx];
1024 12 50       29 croak "rows must contain refs to arrays of cells (row $rx is $row)"
1025             unless __looks_like_aref($row);
1026 12         26 for my $cx (0..$#$row) {
1027 30 50       54 croak "New cell at Row ",$rx+1," Column ",cx2let($cx)," contains a ref"
1028             if ref($row->[$cx]);
1029             }
1030             }
1031             croak '\'linenums\' if present must be (a ref to) an array of numbers, ',
1032             ' "" or "???"', ivis('\nnot $linenums\n')
1033             unless ref($linenums) eq "ARRAY"
1034 6 50 33 6   74 && all{ defined() and looks_like_number($_) || !/[^\?]/ } @$linenums;
  6 50 33     25  
1035              
1036 6 100       31 if (@$rows) { # Determine num_cols and pad short rows
1037 4         8 my $nc = 0;
1038 4 100       13 foreach (@$rows) { $nc = @$_ if @$_ > $nc }
  12         24  
1039 4 50 33     15 if ($num_cols && $num_cols != $nc) {
1040 0         0 croak "num_cols=$num_cols was specified along with initial data, but\n",
1041             "the value doesn't match the data (which has up to $nc columns)\n"
1042             } else {
1043 4         23 $hash->{num_cols} = $num_cols = $nc;
1044             }
1045             # Pad short rows with empty fields
1046 4         15 foreach my $row (@$rows) {
1047 12         26 push @$row, (("") x ($num_cols - @$row));
1048             }
1049 4         10 $#$linenums = $#$rows;
1050 4   100     11 foreach (@$linenums) { $_ //= '???' };
  12         43  
1051             } else {
1052             # There is no data. Default num_cols to zero, but leave any
1053             # user-supplied value so a subsequent insert_rows() will know how
1054             # many columns to create.
1055 2   50     11 $hash->{num_cols} //= 0;
1056             }
1057             #oops unless $hash->{data_source};
1058 6 50       16 croak "#linenums ($#$linenums) != #rows ($#$rows)\n",
1059             dvis '$hash->{linenums}\n$hash->{rows}'
1060             unless @$linenums == @$rows;
1061              
1062 6         11 $hash->{title_rx} = undef;
1063 6         13 $hash->{first_data_rx} = undef;
1064 6         10 $hash->{last_data_rx} = undef;
1065 6         16 $hash->{useraliases} = {};
1066 6         46 $self->_rebuild_colx; # Set up colx colx_desc
1067 6         12 $self
1068             }#_rows_replaced
1069              
1070             #########################################################
1071             # Combination FUNCTION/METHOD
1072             # These are declared with signatures for use as functional-API FUNCTIONs
1073             # which use the caller's "current sheet" as the implicit object.
1074             #
1075             # However they may also be called as METHODs with an explicit object.
1076             #########################################################
1077              
1078             # Allow user to find out names of tied variables
1079             sub tied_varnames(;@) {
1080 0     0 0 0 my ($self, $opts) = &__selfmust_opthash;
1081 0   0     0 my $pkg = $opts->{package} // __callerpkg();
1082 0   0     0 my $h = $$self->{pkg2tiedvarnames}->{$pkg} //= {};
1083 0         0 return keys %$h;
1084             }
1085              
1086             # Internal: Tie specified variables into a package if not already tied.
1087             # Returns:
1088             sub __TCV_REDUNDANT() { 1 } # if all were already tied
1089             sub __TCV_OK() { 2 } # otherwise (some tied, or no idents specified)
1090             #
1091             sub _tie_col_vars {
1092 3     3   6 my $self = shift;
1093 3         7 my $pkg = shift;
1094 3         5 my $parms = shift;
1095             # Remaining arguments are idents
1096              
1097 3         23 my ($safe, $file, $lno) = @$parms;
1098 3 100       17 my @safecheck_pkgs = $pkg eq "main" ? ($pkg) : ($pkg, "main");
1099              
1100             my ($colx, $colx_desc, $debug, $silent)
1101 3         26 = @$$self{qw/colx colx_desc debug silent/};
1102              
1103             # FIXME: BUG/ISSUE ...
1104             # Why is it correct to keep tiedvarnames PER-SHEET ?
1105             # Isn't this a global property of each package?
1106              
1107 3   50     25 my $tiedvarnames = ($$self->{pkg2tiedvarnames}->{$pkg} //= {});
1108              
1109 3 50 33     27 if (@_ > 0 && %$tiedvarnames) {
1110             SHORTCUT: {
1111 0         0 foreach (@_) {
  0         0  
1112 0 0       0 last SHORTCUT unless exists $tiedvarnames->{$_};
1113             }
1114 0         0 return __TCV_REDUNDANT;
1115             }
1116             }
1117              
1118             VAR:
1119 3         12 foreach (sort {$a->[0] <=> $b->[0]} # sort for ease of debugging
  26         51  
1120             map {
1121 16         31 my $cx = $colx->{$_};
1122             defined($cx)
1123 16 50       67 ? [ $cx, $_, $colx_desc->{$_} ]
1124             : [ 999999, $_, "(currently NOT DEFINED)" ];
1125             } @_
1126             )
1127             {
1128 16         162 my ($cx, $ident, $desc) = @$_;
1129 16 50       71 oops unless $ident =~ /^\w+$/;
1130              
1131 16 50       35 if (exists $tiedvarnames->{$ident}) {
1132 0 0       0 $self->_log(" Previously tied: \$${pkg}::${ident}\n") if $debug;
1133             next
1134 0         0 }
1135              
1136 16 50       28 btw dvis '##ZZ $ident $safe ${^GLOBAL_PHASE} @safecheck_pkgs\n' if $debug;
1137              
1138 3     3   4713 no strict 'refs';
  3         8  
  3         526  
1139 16 100       35 if ($safe) {
1140 8 50       21 if (${^GLOBAL_PHASE} ne "START") {
1141 0 0       0 $self->_carponce("Not tieing new variables because :safe was used and this is not (any longer) during compile time\n") unless $silent;
1142 0         0 return __TCV_REDUNDANT; ### IMMEDIATE EXIT ###
1143             }
1144 8         12 foreach my $p (@safecheck_pkgs) {
1145             # Per 'man perlref' we can not use *foo{SCALAR} to detect a never-
1146             # declared SCALAR (it's indistinguishable from an existing undef var).
1147             # So we must insist that the entire glob does not exist.
1148 16 50       29 btw ivis ' Checking pkg $p ident $ident ...\n' if $debug;
1149 3     3   25 no strict 'refs';
  3         27  
  3         511  
1150 16 50       23 if (exists ${$p.'::'}{$ident}) {
  16         71  
1151 0         0 my $msg = <<EOF ;
1152             COLSPEC '$ident' clashes with an existing object in package '$p' .
1153             This is dis-allowed when tie_column_vars was called with option :safe,
1154             in this case at ${file}:${lno} . In this situation you can not
1155             explicitly declare the tied variables, and they must be tied and
1156             imported before the compiler sees them.
1157              
1158             Note: The clash might not be with a scalar \$$ident, but something else
1159             named '${ident}' in the same package (array, hash, sub, filehandle,
1160             etc.) Unfortunately it is not possible to distinguish a non-existent
1161             scalar from a declared scalar containing an undef value
1162             (see *foo{SCALAR} in 'man perlref'). Therefore, to be safe,
1163             nothing is allowed to pre-exist with the name '${ident}'.
1164             EOF
1165             # We can detect anything other than an undef scalar
1166 0         0 local $Data::Dumper::Maxdepth = 2;
1167 0         0 local $Data::Dumper::Interp::Maxdepth = 2;
1168 0         0 my $fqname = $p."::".$ident;
1169 3     3   21 no strict 'refs';
  3         13  
  3         35656  
1170 0 0       0 if (my $r = *$fqname{SCALAR}) {
1171 0 0       0 if (defined($$r)) {
1172 0         0 $msg .= "\nExisting *Defined* Scalar \$$ident = ".vis($$r)."\n";
1173             }
1174             }
1175 0 0       0 if (my $r = *$fqname{ARRAY}) {
1176 0         0 $msg .= "\nExisting Array \@$fqname = ".avis(@$r)."\n";
1177             }
1178 0 0       0 if (my $r = *$fqname{HASH}) {
1179 0         0 $msg .= "\nExisting Hash \%$fqname = ".hvis(%$r)."\n";
1180             }
1181 0 0       0 if (my $r = *$fqname{CODE}) {
1182 0         0 $msg .= "\nExisting $fqname = ".Data::Dumper->new([$r])->Terse(1)->Indent(0)->Deparse(1)->Dump()."\n";
1183             }
1184 0 0       0 if (my $r = *$fqname{IO}) {
1185 0         0 $msg .= "\nExisting IO object $fqname (file/dir handle, etc.)\n";
1186             }
1187 0 0       0 if (eval{ require Devel::Symdump; }) {
  0         0  
1188 0         0 my $obj = Devel::Symdump->new($p);
1189 0         0 foreach (qw/FILEHANDLE.s DIRHANDLE.s UNKNOWN.s PACKAGE.s/) {
1190 0 0       0 /^(.+)\.(.*)$/ or die; my $kind = lc $1; my $methname = lc $1.$2;
  0         0  
  0         0  
1191 0 0       0 if (grep{$_ eq $ident} $obj->$methname()) {
  0         0  
1192 0         0 $msg .= "\n Found existing $kind object $fqname\n";
1193             }
1194             }
1195             }
1196 0         0 croak "\n$msg\n";
1197             }
1198             }
1199             }
1200              
1201 16 50       29 $self->_log("tie \$${pkg}::${ident} to $desc\n") if $debug;
1202              
1203 16         33 $tiedvarnames->{$ident} = 1;
1204              
1205 16         30 *{"$pkg\::$ident"} = \${ *{gensym()} };
  16         245  
  16         20  
  16         106  
1206              
1207 16         33 tie ${"${pkg}::$ident"}, 'Tie::Indirect::Scalar',
  16         78  
1208             \&_tiecell_helper, $pkg, $ident;
1209             }
1210 3         35 return __TCV_OK;
1211             }
1212             sub _tiecell_helper {
1213 2     2   116 my($mutating, $pkg, $ident) = @_;
1214 2   33     19 my $sheet = $pkg2currsheet{$pkg}
1215             // croak "No sheet is currently valid for package $pkg\n";
1216 2         19 $sheet->_onlyinapply("tied variable \$$ident");
1217              
1218             # WRONG... it croaks bc sheet->{rows}->[rx] is a rowhash which doesn't like undef keys
1219             # WRONG: This returns \undef if $ident is not currently valid
1220 2         12 \( $$sheet->{rows}->[$$sheet->{current_rx}]->{$ident} )
1221             }
1222              
1223             sub _all_valid_idents {
1224 3     3   10 my $self = shift;
1225 3         6 my %valid_idents;
1226 3         7 foreach (keys %{ $$self->{colx} }) {
  3         17  
1227 18 50       55 if (/^(?:REGERROR|REGMARK|AUTOLOAD)$/) {
1228 0         0 $self->_carponce("WARNING: Column key ",visq($_)," conflicts with a Perl built-in; variable will not be tied.\n");
1229 0         0 next;
1230             }
1231 18         34 $valid_idents{ title2ident($_) } = 1;
1232             }
1233 3         24 return keys %valid_idents;
1234             }
1235              
1236             sub tie_column_vars(;@) {
1237 3     3 1 16 my ($self, $opts) = &__self_opthash;
1238             # Any remaining args specify variable names matching
1239             # alias names, either user-defined or automatic.
1240              
1241 3 50       11 croak "tie_column_vars without arguments (did you intend to use ':all'?)"
1242             unless @_;
1243              
1244 3   33     28 local $$self->{silent} = $opts->{silent} // $$self->{silent};
1245 3   33     32 local $$self->{verbose} = $opts->{verbose} // $$self->{verbose};
1246 3   33     19 local $$self->{debug} = $opts->{debug} // $$self->{debug};
1247              
1248 3   66     16 my $pkg = $opts->{package} // __callerpkg();
1249              
1250 3         6 my (%tokens, @varnames);
1251 3 50       9 foreach (@_) { if (/:/) { $tokens{$_} = 1 } else { push @varnames, $_ } }
  5         22  
  5         20  
  0         0  
1252 3         11 foreach (@varnames) {
1253 0 0       0 croak "Invalid variable name '$_'\n" unless /^\$?\w+$/;
1254 0         0 s/^\$//;
1255             }
1256              
1257             # With ':all' tie all possible variables, now and in the future.
1258             #
1259             # CURRENTLY UNDOCUMENTED: With the ":safe" token, a check is made
1260             # that variables do not already exist immediately before tying them;
1261             # otherwise an exception is thrown.
1262             #
1263             # When ':safe' is combined with ':all', variables will not be checked & tied
1264             # except during compile time, i.e. within BEGIN{...}. Therefore a
1265             # malicious spreadsheet can not cause an exception after the compilation
1266             # phase.
1267 3         6 my $safe = delete $tokens{':safe'};
1268 3         11 my ($file, $lno) = __fn_ln_methname();
1269 3         12 my $parms = [$safe, $file, $lno];
1270              
1271             # Why? Obsolete? Only for :all?? [note added Dec22]
1272             # FIXME TODO: is this bogus?
1273 3 50       14 $self->title_rx($opts->{title_rx}) if exists $opts->{title_rx};
1274              
1275 3 50       13 if (delete $tokens{':all'}) {
1276             # Remember parameters for tie operations which might occur later
1277 3         14 $$self->{pkg2tieall}->{$pkg} = $parms;
1278              
1279 3         32 push @varnames, sort $self->_all_valid_idents;
1280             }
1281 3 50       10 croak "Unrecognized token in arguments: ",avis(keys %tokens) if %tokens;
1282              
1283 3         16 my $r = $self->_tie_col_vars($pkg, $parms, @varnames);
1284              
1285 3 50       12 my $pfx = ($r == __TCV_REDUNDANT ? "[ALL REDUNDANT] " : "");
1286             log_methcall $self, [\$pfx, \__fmt_uqarraywithqw(keys %tokens, @varnames),
1287             \" in package $pkg"]
1288 3 50       22 if $$self->{verbose};
1289             }#tie_column_vars
1290              
1291             #
1292             # Accessors for misc. sheet data
1293             #
1294              
1295 274     274 1 387 sub attributes(@) { ${&__selfonly}->{attributes} }
  274         498  
1296 0     0 1 0 sub colx() { ${&__selfmustonly}->{colx} }
  0         0  
1297 0     0 1 0 sub colx_desc() { ${&__selfmustonly}->{colx_desc} }
  0         0  
1298             sub data_source(;$) {
1299 274     274 1 459 my $self = &__selfmust;
1300 274 50       659 if (@_ == 0) { # 'get' request
1301 274 50       654 log_methcall $self, [], [$$self->{data_source}] if $$self->{verbose};
1302             return $$self->{data_source}
1303 274         1053 }
1304 0 0       0 log_methcall $self, [@_] if $$self->{verbose};
1305 0 0       0 croak "Too many args" unless @_ == 1;
1306 0         0 $$self->{data_source} = $_[0];
1307 0         0 $self
1308             }
1309 0     0 1 0 sub linenums() { ${&__selfmustonly}->{linenums} }
  0         0  
1310 0     0 1 0 sub num_cols() { ${&__selfmustonly}->{num_cols} }
  0         0  
1311 161     161 1 238 sub rows() { ${&__selfmustonly}->{rows} }
  161         288  
1312 274     274 1 359 sub sheetname() { ${&__selfmustonly}->{sheetname} }
  274         511  
1313              
1314 0     0 0 0 sub iolayers() { ${&__selfmustonly}->{iolayers} }
  0         0  
1315 0     0 0 0 sub meta_info() {${&__selfmustonly}->{meta_info} }
  0         0  
1316             sub input_encoding() {
1317             # Emulate old API. We actually store input_iolayers instead now,
1318             # so as to include :crlf if necessary.
1319 0     0 0 0 my $self = &__selfmustonly;
1320 0         0 local $_;
1321             return undef unless
1322 0         0 exists(${$self}->{input_iolayers})
1323 0 0 0     0 && ${$self}->{input_iolayers} =~ /encoding\(([^()]*)\)/;
  0         0  
1324 0         0 return $1;
1325             }
1326              
1327             # See below for title_rx()
1328             sub title_row() {
1329 0     0 1 0 my $self = &__selfmustonly;
1330 0         0 my $title_rx = $$self->{title_rx};
1331 0 0       0 my $r = defined($title_rx) ? $$self->{rows}->[$title_rx] : undef;
1332 0 0       0 log_methcall $self, [], [$r] if $$self->{verbose};
1333 0         0 $r
1334             }
1335 118     118 1 234 sub rx() { ${ &__selfmustonly }->{current_rx} }
  118         246  
1336             sub crow() {
1337 0     0 1 0 my $self = &__selfmustonly;
1338 0         0 ${ $self->_onlyinapply("crow() method") }->{rows}->[$$self->{current_rx}]
  0         0  
1339             }
1340             sub linenum() {
1341 0     0 1 0 my $self = &__selfmustonly;
1342 0   0     0 my $current_rx = $$self->{current_rx} // return(undef);
1343 0         0 $$self->{linenums}->[$current_rx];
1344             }
1345             sub _getref {
1346 0     0   0 my ($self, $rx, $ident) = @_;
1347 0         0 my ($rows, $colx) = @$$self{qw/rows colx/};
1348 0 0 0     0 croak "get/set: rx $rx is out of range" if $rx < 0 || $rx > $#$rows;
1349 0         0 my $row = $$self->{rows}->[$rx];
1350 0         0 my $cx = $colx->{$ident};
1351 0 0 0     0 oops("Invalid cx ".vis($cx)) if ! defined($cx) || $cx < 0 || $cx > $#$row;
      0        
1352 0         0 \$row->[$cx];
1353             }
1354             # get/set a cell given by (rx,COLSPEC)
1355             sub get($$) {
1356 0     0 1 0 my $self = &__selfmust;
1357 0         0 my $ref = $self->_getref(@_);
1358 0         0 $$ref;
1359             }
1360             sub set($$$) {
1361 0     0 1 0 my $self = &__selfmust;
1362 0         0 my ($rx, $colspec, $newval) = @_;
1363 0         0 my $ref = $self->_getref($rx, $colspec);
1364 0         0 $$ref = $newval;
1365 0         0 $self
1366             }
1367              
1368             sub __validate_sheet_arg($) {
1369 12     12   25 my $sheet = shift;
1370 12 50 33     111 croak "Argument '${\u($sheet)}' is not a Spreadsheet::Edit sheet object"
  0   66     0  
1371             if defined($sheet) and
1372             !blessed($sheet) || !$sheet->isa("Spreadsheet::Edit");
1373 12         34 $sheet;
1374             }
1375              
1376             #---- logging ----------------------------------
1377              
1378             # Print segmented log messages:
1379             # Join args together, prefixing with "> " or ">> " etc.
1380             # unless the previous call did not end with newline.
1381             # Maintains internal state. A final call with an ending \n must occur.
1382             sub _log {
1383 0     0   0 my $self = shift;
1384 0         0 state $in_midst;
1385             print STDERR join "",
1386             ($in_midst ? "" : (">" x ($$self->{cmd_nesting}||1))),
1387 0 0 0     0 map{u} @_;
  0         0  
1388 0         0 $in_midst = ($_[$#_] !~ /\n\z/s);
1389             }
1390              
1391             my $trunclen = 200;
1392             sub fmt_sheet($) {
1393 0     0 0 0 my $sheet = shift;
1394 0 0       0 return "undef" unless defined($sheet);
1395             #oops unless ref($sheet) ne "" && $sheet->isa(__PACKAGE__);
1396 0         0 local $$sheet->{verbose} = 0;
1397 0   0     0 my $desc = $sheet->sheetname() || $sheet->data_source() || "no data_source";
1398 0 0       0 if (length($desc) > $trunclen) { $desc = substr($desc,($trunclen-3))."..." }
  0         0  
1399 0         0 my $r = addrvis($sheet)."($desc)";
1400 0         0 $r
1401             }
1402              
1403             sub _logmethifv { # args: just linearized INPUT items
1404 0 0   0   0 return unless ${$_[0]}->{verbose};
  0         0  
1405 0         0 my $self = shift;
1406 0         0 log_methcall $self, [@_];
1407             }
1408              
1409             sub _logmethretifv { # args: [INPUTS], [OUTPUTS]
1410 8 50   8   27 oops if @_ > 3; # (self, items, retvals)
1411 8 50       14 return unless ${$_[0]}->{verbose};
  8         39  
1412 0         0 goto &log_methcall
1413             }
1414             #-----------------------------------------------
1415              
1416             my $__FILE__ = __FILE__;
1417             my $__PACKAGE__ = __PACKAGE__;
1418              
1419             sub _call_usercode($$$) {
1420 6     6   32 my ($self, $code, $cxlist) = @_;
1421              
1422 6 50       29 if (@$cxlist) {
1423 0         0 my $row = $self->crow();
1424 0         0 for ($row->[$cxlist->[0]]) { # bind $_ to the first-specified column
1425 0         0 eval{ &$code(@$row[@$cxlist]) };
  0         0  
1426             }
1427             } else {
1428 6         13 eval{ $code->() };
  6         23  
1429             ##Simplify backtraces
1430             #@_ = ();
1431             #goto &$code;
1432             }
1433 6 50       1426 if ($@) { # filter out our internal stuff to make easier to read
1434 0         0 { local $_;
  0         0  
1435             $@ =~ s/^[^\n]* called \Kat $__FILE__ line .*?(?=\s*${__PACKAGE__}::\w*apply)/from [apply internals]/msg
1436 0 0       0 unless $$self->{debug};
1437             }
1438 0         0 die "$@\n";
1439             }
1440             }
1441              
1442             # Do apply, handling COLSPEC args.
1443             # If $rxlists or $rxfirst & $rxlast are undef, visit all rows.
1444             sub _apply_to_rows($$$;$$$) {
1445 6     6   28 my ($self, $code, $cxlist, $rxlist, $rxfirst, $rxlast) = @_;
1446 6         21 my $hash = $$self;
1447 6         42 my ($linenums,$rows,$num_cols,$cl) = @$hash{qw/linenums rows num_cols/};
1448              
1449 6 50       33 croak $self->_methmsg("Missing or incorrect {code} argument")
1450             unless ref($code) eq "CODE";
1451 6         30 foreach (@$cxlist) {
1452 0 0 0     0 if ($_ < 0 || $_ >= $num_cols) {
1453 0         0 croak $self->_methmsg("cx $_ is out of range")
1454             }
1455             }
1456              
1457             { # Temp save "current_rx" from an enclosing apply
1458 6         20 local $hash->{current_rx} = undef;
  6         26  
1459              
1460             # Temp update "current apply sheet" for logmsg()
1461 6         25 local $_inner_apply_sheet = $self;
1462              
1463 6 100       25 if (defined $rxlist) {
1464 5         24 foreach my $rx (@$rxlist) {
1465 5 50 33     43 croak "rx $rx is out of range"
1466             if $rx < 0 || $rx > $#$rows;
1467 5         24 $hash->{current_rx} = $rx;
1468 5         54 _call_usercode($self,$code,$cxlist);
1469             }
1470             } else {
1471             # Do not cache $#$rows so user can call insert_rows() or delete_rows()
1472 1   50     17 for (my $rx = $rxfirst // 0;
      33        
      66        
1473             $rx <= $#$rows && (!defined($rxlast) || $rx <= $rxlast);
1474             $rx++)
1475             {
1476 1         4 $hash->{current_rx} = $rx;
1477 1         9 _call_usercode($self,$code,$cxlist);
1478 1         10 $rx = $hash->{current_rx}; # might have been changed by delete_rows()
1479             }
1480             }
1481             }
1482              
1483             croak "After completing apply, an enclosing apply was resumed, but",
1484             " current_rx=",$hash->{current_rx}," now points beyond the last row!\n"
1485 6 50 33     28 if defined($hash->{current_rx}) && $hash->{current_rx} > $#$rows;
1486 6         33 $self
1487             }#_apply_to_rows
1488              
1489             # Rebuild %colx and %colx_desc, and tie any required new variables.
1490             #
1491             # User-defined column aliases must already be valid in %colx;
1492             # all other entries are deleted and re-created.
1493             #
1494             # Note: the special '^', '$' and numieric cx values (if in 0..num_cols)
1495             # are handled algorithmically in _specs2cxdesclist() before consulting %colx.
1496             #
1497             # When building %colx, conflicts are resolved using these priorities:
1498             #
1499             # User-defined aliases (ALWAYS valid)
1500             # Titles (if unique)
1501             # Trimmed titles (with leading & trailing spaces removed)
1502             # Automatic aliases
1503             # ABC letter-codes
1504             #
1505             # Warnings are issued once for each conflict.
1506             sub _rebuild_colx {
1507             my $self = shift;
1508             my $notie = $_[0]; # true during autodetect probing
1509              
1510             my ($silent, $colx, $colx_desc, $useraliases, $num_cols, $title_rx,
1511             $rows, $debug, $pkg2tieall)
1512             = @$$self{qw/silent colx colx_desc useraliases num_cols title_rx
1513             rows debug pkg2tieall/};
1514              
1515             # Save user-defined Aliases before wiping %colx
1516             my %useralias;
1517             foreach my $alias (keys %$useraliases) {
1518             my $cx = $colx->{$alias};
1519             # cx may be undef if referenced column was deleted and/or if
1520             # an alias was created with {optional => TRUE} with non-matching regex.
1521             #next if !defined($cx); # the referenced column was deleted
1522             $useralias{$alias} = [$cx, $colx_desc->{$alias}];
1523             }
1524              
1525             # Now re-generate
1526             %$colx = ();
1527             %$colx_desc = ();
1528              
1529             my sub __putback($$$;$) {
1530             my ($key, $cx, $desc, $nomasking) = @_;
1531             if (defined (my $ocx = $colx->{$key})) {
1532             if ($cx != $ocx) {
1533             oops if $nomasking; # _get_usable_titles should have screen out
1534             $self->_carponce("Warning: ", visq($key), " ($desc) is MASKED BY (",
1535             $colx_desc->{$key}, ")") unless $silent;
1536             }
1537             } else {
1538             oops if exists $colx->{$key};
1539             $colx->{$key} = $cx;
1540             $colx_desc->{$key} = $desc;
1541             }
1542             }
1543              
1544             # Put back the user aliases
1545             while (my ($alias,$aref) = each %useralias) {
1546             my ($cx, $desc) = @$aref;
1547             __putback($alias, $cx, $desc);
1548             }
1549              
1550             if (defined $title_rx) {
1551             # Add non-conflicting titles
1552             my ($normal_titles, $unindexed_titles) = $self->_get_usable_titles;
1553             foreach (@$normal_titles) {
1554             my ($title, $cx) = @$_;
1555             __putback($title, $cx, __fmt_cx($cx).": Title", 1); # nomasking==1
1556             }
1557             # Titles with leading & trailing spaces trimmed off
1558             foreach (@$normal_titles) {
1559             my ($title, $cx) = @$_;
1560             my $key = $title;
1561             $key =~ s/\A\s+//s; $key =~ s/\s+\z//s;
1562             if ($key ne $title) {
1563             __putback($key, $cx, __fmt_cx($cx).": Title sans lead/trailing spaces",1);
1564             }
1565             }
1566             # Automatic aliases
1567             # N.B. These come from all titles, not just "normal" ones
1568             foreach (@$normal_titles, @$unindexed_titles) {
1569             my ($title, $cx) = @$_;
1570             my $ident = title2ident($title);
1571             __putback($ident, $cx, __fmt_cx($cx).": Automatic alias for title");
1572             }
1573             }
1574             my %abc;
1575             foreach my $cx ( 0..$num_cols-1 ) {
1576             my $ABC = cx2let($cx);
1577             __putback($ABC, $cx, "cx $cx: Standard letter-code");
1578             }
1579              
1580             unless ($notie) {
1581             # export and tie newly-defined magic variables to packages which want that.
1582             if (my @pkglist = grep {defined $pkg2tieall->{$_}} keys %$pkg2tieall) {
1583             my @idents = $self->_all_valid_idents;
1584             foreach my $pkg (@pkglist) {
1585             $self->_tie_col_vars($pkg, $pkg2tieall->{$pkg}, @idents);
1586             }
1587             }
1588             }
1589             } # _rebuild_colx
1590              
1591             # Move and/or delete column positions. The argument is a ref to an array
1592             # containing the old column indicies of current (i.e. surviving) columns,
1593             # or undefs for new columns which did not exist previously.
1594             sub _adjust_colx {
1595 0     0   0 my ($self, $old_colxs) = @_;
1596             my ($colx, $colx_desc, $num_cols, $useraliases, $debug)
1597 0         0 = @$$self{qw/colx colx_desc num_cols useraliases debug/};
1598 0 0       0 oops unless @$old_colxs == $num_cols;
1599 0         0 my %old2new;
1600 0         0 foreach my $new_cx (0..$#$old_colxs) {
1601 0         0 my $old_cx = $old_colxs->[$new_cx];
1602 0 0       0 $old2new{$old_cx} = $new_cx if defined $old_cx;
1603             }
1604             # User-defined aliases are for arbitrary columns, so fix them manually
1605 0         0 foreach my $alias (keys %$useraliases) {
1606 0         0 my $cx = $colx->{$alias};
1607 0 0       0 next unless defined $cx; # e.g. non-unique title; see _rebuild_colx()
1608 0 0       0 if (defined (my $new_cx = $old2new{$cx})) {
1609 0 0       0 warn ">adjusting colx{$alias} : $colx->{$alias} -> $new_cx\n" if $debug;
1610 0         0 $colx->{$alias} = $new_cx;
1611             } else {
1612 0 0       0 warn ">deleting colx{$alias} (was $colx->{$alias})\n" if $debug;
1613 0         0 delete $colx->{$alias};
1614 0         0 delete $colx_desc->{$alias};
1615 0         0 delete $useraliases->{$alias};
1616             }
1617             }
1618             # Everything else is derived from actual titles
1619 0         0 $self->_rebuild_colx();
1620             }
1621              
1622             # Translate list of COLSPECs to a list of [cx,desc].
1623             # Regexes may match multiple columns.
1624             # THROWS if a spec does not indicate any existing column.
1625             sub _specs2cxdesclist {
1626 0     0   0 my $self = shift;
1627 0         0 my ($colx, $colx_desc, $num_cols) = @$$self{qw/colx colx_desc num_cols/};
1628 0         0 my @results;
1629 0         0 foreach my $spec (@_) {
1630 0 0       0 croak "Column specifier is undef!" unless defined $spec;
1631 0 0       0 if ($spec eq '^') {
1632 0         0 push @results, [0, "Special '^' specifier for first col"];
1633             next
1634 0         0 }
1635 0 0       0 if ($spec eq '$') {
1636 0         0 push @results, [$num_cols-1, "Special '\$' specifier for last col"];
1637             next
1638 0         0 }
1639 0 0 0     0 if (($spec =~ /^[1-9]\d*$/ || $spec eq "0")
      0        
1640             && $spec <= $num_cols) { # allow one-past-end
1641 0         0 push @results, [$spec, "Numeric column-index"];
1642             next
1643 0         0 }
1644 0 0       0 if (defined (my $cx = $colx->{$spec})) {
1645 0         0 push @results, [$cx, $colx_desc->{$spec}];
1646             next
1647 0         0 }
1648 0 0       0 if (ref($spec) eq 'Regexp') {
1649 0         0 my ($title_rx, $rows) = @$$self{qw/title_rx rows/};
1650 0 0       0 croak "Can not use regex: No title-row is defined!\n"
1651             unless defined $title_rx;
1652 0   0     0 my $title_row = $rows->[$title_rx] // oops;
1653 0         0 my $matched;
1654 0         0 for my $cx (0..$#$title_row) {
1655 0         0 my $title = $title_row->[$cx];
1656             # Note: We can't use /s here! The regex compiler has already
1657             # encapsulated /s or lack thereof in the compiled regex
1658 0 0       0 if ($title =~ /$spec/) {
1659 0         0 push @results, [$cx, "cx $cx: regex matched title '$title'"];
1660 0         0 $matched++;
1661             }
1662             }
1663 0 0       0 if (! $matched) {
1664 0         0 croak "\n--- Title Row (rx $title_rx) ---\n",
1665             vis($title_row),"\n-----------------\n",
1666             "Regex $spec\n",
1667             "does not match any of the titles (see above) in '",
1668             fmt_sheet($self),"'\n"
1669             # N.B. check for "does not match" in alias()
1670             }
1671             next
1672 0         0 }
1673 0         0 croak "Invalid column specifier '${spec}'\nnum_cols=$num_cols. Valid keys are:\n",
1674             $self->_fmt_colx;
1675             }
1676 0 0       0 oops unless wantarray;
1677             @results
1678 0         0 }#_specs2cxdesclist
1679             sub _spec2cx { # return $cx or ($cx, $desc); throws if spec is invalid
1680 0     0   0 my ($self, $spec) = @_;
1681 0         0 my @list = $self->_specs2cxdesclist($spec);
1682 0 0       0 if (@list > 1) {
1683             croak ivis("Regexpr $spec matches multiple titles:\n "),
1684 0         0 join("\n ",map{ vis $_->[1] } @list), "\n";
  0         0  
1685             }
1686 0         0 __first_ifnot_wantarray( @{$list[0]} ) # cx or (cx,desc)
  0         0  
1687             }
1688              
1689             sub _colspec2cx {
1690 0     0   0 my ($self, $colspec) = @_;
1691 0 0       0 croak "COLSPEC may not be a regex" if ref($colspec) eq 'Regexp';
1692 0         0 goto &_spec2cx
1693             }
1694              
1695             # The user-callable API
1696             # THROWS if a spec does not indicate any existing column.
1697             # Can return multiple results due to multple args and/or Regexp multimatch
1698             # In scalar context returns the first result.
1699             sub spectocx(@) { # the user-callable API
1700 0     0 1 0 my $self = &__selfmust;
1701 0         0 my @list = $self->_specs2cxdesclist(@_);
1702 0         0 __first_ifnot_wantarray( map{ $_->[0] } @list )
  0         0  
1703             }
1704              
1705             # Translate a possibly-relative column specification which
1706             # indicate 1 off the end.
1707             #
1708             # The specification may be
1709             # >something (the column after 'something')
1710             # or
1711             # an absolute column indicator (cx or ABC), possibly 1 off the end
1712             # or
1713             # refer to an existing column
1714             #
1715             sub _relspec2cx {
1716 0     0   0 my ($self, $spec) = @_;
1717 0         0 my $colx = $$self->{colx};
1718 0 0       0 if ($spec =~ /^>(.*)/) {
1719 0         0 my $cx = $self->_colspec2cx($1); # croaks if not an existing column
1720 0         0 return $cx + 1
1721             }
1722 0         0 $self->_colspec2cx($spec); # croaks if not an existing column
1723             }
1724              
1725             sub alias(@) {
1726 0     0 1 0 my $self = &__selfmust;
1727 0 0       0 my $opthash = ref($_[0]) eq 'HASH' ? shift() : {};
1728 0 0       0 if ($opthash) {
1729 0         0 __validate_opthash($opthash, [qw(optional)],
1730             desc => "alias option");
1731             }
1732 0 0       0 croak "'alias' expects an even number of arguments\n"
1733             unless scalar(@_ % 2)==0;
1734              
1735             my ($colx, $colx_desc, $num_cols, $useraliases, $rows, $silent, $debug)
1736 0         0 = @$$self{qw/colx colx_desc num_cols useraliases rows silent debug/};
1737              
1738 0         0 my ($file, $lno) = __fn_ln_methname();
1739              
1740 0         0 my @cxlist;
1741 0         0 while (@_) {
1742 0         0 my $ident = _validate_ident( shift @_ );
1743 0         0 my $spec = shift @_;
1744              
1745 0 0       0 if (my $wheredefined = $useraliases->{$ident}) {
1746 0         0 croak "'$ident' is already a user-defined alias",
1747             " for cx ", scalar($self->_spec2cx($ident)),
1748             " defined at ", $wheredefined, "\n"
1749             #" (for cx ", scalar($self->_spec2cx($ident)), ")"
1750             }
1751             croak "'$ident' is already a user-defined alias (for cx ",
1752             scalar($self->_spec2cx($ident)), ")"
1753 0 0       0 if $useraliases->{$ident};
1754              
1755 0         0 my $cx = eval{ $self->_spec2cx($spec) };
  0         0  
1756 0 0       0 unless(defined $cx) {
1757 0 0       0 oops unless $@;
1758 0 0 0     0 croak $@ unless $opthash->{optional} && $@ =~ /does not match/is;
1759             # Always throw on other errors, e.g. regex matches more than one title
1760             };
1761              
1762             # Log each pair individually
1763 0         0 $self->_logmethifv($opthash, \", $ident => ",\__fmt_colspec_cx($spec,$cx));
1764              
1765 0         0 $colx->{$ident} = $cx;
1766 0         0 $colx_desc->{$ident} = "alias for ".__fmt_cx($cx)." (".quotekey($spec).")";
1767 0         0 $useraliases->{$ident} = "${file}:$lno";
1768 0         0 push @cxlist, $cx;
1769             }
1770 0         0 $self->_rebuild_colx();
1771              
1772 0         0 __first_ifnot_wantarray( @cxlist )
1773             }#alias
1774              
1775             sub unalias(@) {
1776 0     0 1 0 my $self = &__selfmust;
1777 0 0       0 croak __methname, " does not accept an {OPTIONS} hash\n"
1778             if ref($_[0]) eq 'HASH';
1779              
1780             my ($colx, $colx_desc, $useraliases)
1781 0         0 = @$$self{qw/colx colx_desc useraliases/};
1782              
1783 0         0 foreach (@_) {
1784 0   0     0 delete $useraliases->{$_} // croak "unalias: '$_' is not a column alias\n";
1785 0         0 $self->_logmethifv(\" Removing alias $_ => ", \$colx_desc->{$_});
1786 0   0     0 delete $colx->{$_} // oops;
1787 0   0     0 delete $colx_desc->{$_} // oops;
1788             }
1789 0         0 $self->_rebuild_colx();
1790 0         0 $self
1791             }
1792              
1793             # title_rx: Get/set the title row index
1794             #
1795             # $rx = title_rx ; # Retrieve
1796             #
1797             # title_rx undef # Set to no titles
1798             #
1799             # title_rx ROWINDEX # Set to specified rx
1800             #
1801             # title_rx 'auto' # Auto-detect the title row; an exception is thrown
1802             # # if a plausible row is not found.
1803             #
1804             # {OPTARGS} may contain
1805             # verbose, silent, debug (temporarily override the object's settings)
1806             #
1807             # Auto-detect options:
1808             # required => [COLSPEC, ...] # required titles
1809             # min_rx, max_rx => NUM # range of rows which may contain the row
1810             # first_cx => NUM # first column ix which must contain a valid title
1811             # last_cx => NUM # last column ix which must contain a valid title
1812             #
1813             # Note: This is called internally by read_spreadsheet(), passing 'auto'
1814             # by default. Therefore users need not call this method explicitly
1815             # except to change title row or if read_spreadsheet was not used at all.
1816             #
1817             sub title_rx(;$@) {
1818 8     8 1 47 my ($self, $opthash_arg) = &__selfmust_opthash;
1819 8         43 my $opthash = { %$opthash_arg }; # make copy so we can modify it
1820 8         21 my @orig_args = @_;
1821              
1822 8         33 my $saved_stdopts = $self->_set_stdopts($opthash);
1823 8     8   95 scope_guard{ $self->_restore_stdopts($saved_stdopts) };
  8         37  
1824 8         56 __validate_opthash( $opthash,
1825             [qw(required min_rx max_rx first_cx last_cx)],
1826             desc => "autodetect option",
1827             undef_ok => [] );
1828 8         22 my $rx = -999;
1829 8 100       35 if (@_ == 0) {
1830             # A return value was requested
1831 2 50       17 croak '{OPTARGS} passed to title_rx with no operator (get request?)'
1832             if %$opthash;
1833 2         17 $rx = $$self->{title_rx};
1834 2         29 $self->_logmethretifv([], [$rx]);
1835             } else {
1836             # N.B. undef arg means there are no titles
1837 6         10 $rx = shift;
1838 6 100       12 my $notie; $notie = shift() if u($_[0]) eq "_notie"; # during auto-detect probes
  6         39  
1839 6 50       44 croak "Extraneous argument(s) to title_rx: ".avis(@_) if @_;
1840              
1841 6 50       25 if (defined $rx) {
1842 6 100       48 if ($rx eq 'auto') {
    50          
    50          
1843 1         5 $rx = $self->_autodetect_title_rx($opthash);
1844             }
1845             elsif ($rx !~ /^\d+$/) {
1846 0         0 croak "Invalid title_rx argument: ", visq($rx);
1847             }
1848 5         24 elsif ($rx > $#{ $$self->{rows} }) {
1849 0         0 croak "Rx $rx is beyond the end of the data", visq($rx);
1850             }
1851             }
1852 6         18 $$self->{title_rx} = $rx;
1853 6         27 $self->_logmethretifv([$opthash_arg, @orig_args], [$rx]);
1854 6         36 $self->_rebuild_colx($notie);
1855             }
1856 8         43 $rx;
1857             }#title_rx
1858              
1859             sub _autodetect_title_rx {
1860 1     1   2 my ($self, $opthash) = @_;
1861              
1862             my ($title_rx, $rows, $colx, $num_cols, $verbose, $debug) =
1863 1         5 @$$self{qw(title_rx rows colx num_cols verbose debug)};
1864              
1865 1 50       4 if ($#$rows == -1) {
1866 0         0 return undef; # completely empty
1867             }
1868              
1869             # Filter out titles which can not be used as a COLSPEC
1870             my @required_specs = $opthash->{required}
1871 1 50       12 ? to_array($opthash->{required}) : ();
1872 1 50       5 croak "undef value in {required}" if grep{! defined} @required_specs;
  0         0  
1873 1         2 @required_specs = grep{ !__unindexed_title($_, $num_cols) } @required_specs;
  0         0  
1874              
1875 1   50     8 my $min_rx = __validate_nonnegi($opthash->{min_rx}//0, "min_rx");
1876 1   33     11 my $max_rx = __validate_nonnegi($opthash->{max_rx}//$min_rx+3, "max_rx");
1877 1 50       4 $max_rx = $#$rows if $max_rx > $#$rows;
1878              
1879 1   50     14 my $first_cx = __validate_nonnegi($opthash->{first_cx}//0, "first_cx");
1880 1   50     5 my $last_cx = __validate_nonnegi($opthash->{last_cx}//INT_MAX, "last_cx");
1881 1 50       17 $last_cx = $num_cols-1 if $last_cx >= $num_cols;
1882              
1883 1         3 my @nd_reasons;
1884 1 50       4 if ($min_rx > $#$rows) {
    50          
1885 0         0 push @nd_reasons, "min_rx ($min_rx) is out of range";
1886             }
1887             elsif ($min_rx > $max_rx) {
1888 0         0 push @nd_reasons,
1889             "min_rx ($min_rx) is greater than max_rx ($max_rx)"
1890             }
1891 1 50       4 if ($first_cx >= $num_cols) {
    50          
1892 0         0 push @nd_reasons, "first_cx ($first_cx) is out of range"
1893             }
1894             elsif ($first_cx > $last_cx) {
1895 0         0 push @nd_reasons,
1896             "first_cx ($first_cx) is less than last_cx ($last_cx)"
1897             }
1898              
1899 1         2 my $detected;
1900 1 50       4 unless (@nd_reasons) {
1901             # no logging during trial and error (except with debug)
1902 1         4 local $$self->{verbose} = $debug;
1903 1         4 local $$self->{silent} = !$debug;
1904 1         3 RX: for my $rx ($min_rx .. $max_rx) {
1905 1 50 33     4 warn "# ",$nd_reasons[-1],"\n" if $debug && @nd_reasons;
1906 1 50       4 warn ivis '#autodetect: Trying RX $rx ...\n' if $debug;
1907              
1908             # Make $rx the title_rx so __specs2cxdesclist() can be used
1909             # e.g. to handle regex COLSPECS. Pass special option to not tie
1910             # user variables yet.
1911 1         6 $self->title_rx($rx, "_notie");
1912 1 50       7 oops unless $rx == $$self->{title_rx};
1913              
1914 1         5 my $row = $rows->[$rx];
1915 1         5 for my $cx ($first_cx .. $last_cx) {
1916 2 50       5 if ($row->[$cx] eq "") {
1917 0         0 push @nd_reasons, "rx $rx: col ".__fmt_cx($cx)." is empty";
1918 0         0 next RX;
1919             }
1920             }
1921 1         3 foreach my $spec (@required_specs) {
1922 0         0 my @list; # A regex might match multiple titles
1923 0         0 eval { @list = $self->_specs2cxdesclist($spec) };
  0         0  
1924 0 0       0 warn ivis ' found $spec in @list\n' if $debug;
1925 0 0       0 if (@list == 0) {
1926 0         0 push @nd_reasons, ivis 'rx $rx: Required column \'$spec\' not found';
1927             next RX
1928 0         0 }
1929 0 0       0 my @shortlist = grep{ $_->[0] >= $first_cx && $_->[0] <= $last_cx }
  0         0  
1930             @list;
1931 0 0       0 if (@shortlist == 0) {
1932 0         0 push @nd_reasons, ivis 'rx $rx: Matched \'$spec\' but in unacceptable cx '.avisl(map{$_->[0]} @list);
  0         0  
1933             next RX
1934 0         0 }
1935 0 0       0 if (! grep{ $_->[1] =~ /title/i } @shortlist) {
  0         0  
1936             ### ??? Can this actually happen ???
1937 0         0 push @nd_reasons, ivis 'rx $rx: \'$spec\' resolved to something other than a title: '.__fmt_pairs(@shortlist);
1938 0         0 next RX;
1939             }
1940 0 0       0 warn ivis ' <<cx is within $first_cx .. $last_cx>>\n' if $debug;
1941             }
1942 1         2 $detected = $rx;
1943             last
1944 1         2 }
1945 1         5 $$self->{title_rx} = undef; # will re-do below
1946             }
1947 1 50       4 if (defined $detected) {
1948 1 50       3 if ($verbose) { # should be $debug ??
1949 0         0 my ($fn, $lno, $methname) = __fn_ln_methname();
1950 0         0 print STDERR "[Auto-detected title_rx = $detected at ${fn}:$lno]\n";
1951             }
1952 1         4 local $$self->{cmd_nesting} = $$self->{cmd_nesting} + 1;
1953 1         3 local $$self->{verbose} = 0; # suppress normal logging
1954 1         13 $self->title_rx($detected); # shows collision warnings unless {silent}
1955 1         7 return $detected;
1956             } else {
1957 0 0       0 if (@nd_reasons == 0) {
1958 0         0 push @nd_reasons, ivis '(BUG?) No rows checked! num_cols=$num_cols rows=$$self->{rows}'.dvis '\n##($min_rx $max_rx $first_cx $last_cx)' ;
1959             }
1960 0         0 croak("In ",qsh(fmt_sheet($self))," ...\n",
1961             " Auto-detect of title_rx with options ",vis($opthash),
1962             dvis ' @required_specs\n',
1963             " failed because:\n ", join("\n ",@nd_reasons),
1964             "\n"
1965             );
1966             }
1967             }
1968              
1969             sub first_data_rx(;$) {
1970 0     0 1 0 my $self = &__self;
1971 0         0 my $first_data_rx = $$self->{first_data_rx};
1972 0 0       0 if (@_ == 0) { # 'get' request
1973 0         0 $self->_logmethretifv([], [$first_data_rx]);
1974 0         0 return $first_data_rx;
1975             }
1976 0         0 my $rx = __validate_nonnegi_or_undef( shift() );
1977 0         0 $self->_logmethretifv([$rx]);
1978             # Okay if this points to one past the end
1979 0 0       0 $self->_check_rx($rx, 1) if defined $rx; # one_past_end_ok=1
1980 0         0 $$self->{first_data_rx} = $rx;
1981 0         0 $self
1982             }
1983             sub last_data_rx(;$) {
1984 0     0 1 0 my $self = &__self;
1985 0         0 my $last_data_rx = $$self->{last_data_rx};
1986 0 0       0 if (@_ == 0) { # 'get' request
1987 0         0 $self->_logmethretifv([], [$last_data_rx]);
1988 0         0 return $last_data_rx;
1989             }
1990 0         0 my $rx = __validate_nonnegi_or_undef( shift() );
1991 0         0 $self->_logmethretifv([$rx]);
1992 0 0       0 if (defined $rx) {
1993 0         0 $self->_check_rx($rx, 1); # one_past_end_ok=1
1994             confess "last_data_rx must be >= first_data_rx"
1995 0 0 0     0 unless $rx >= ($$self->{first_data_rx}//0);
1996             }
1997 0         0 $$self->{last_data_rx} = $rx;
1998 0         0 $self
1999             }
2000              
2001             # move_cols ">COLSPEC",source cols...
2002             # move_cols "absolute-position",source cols...
2003             sub move_cols($@) {
2004 0     0 1 0 my $self = &__selfmust;
2005 0         0 my ($posn, @sources) = @_;
2006              
2007 0         0 my ($num_cols, $rows) = @$$self{qw/num_cols rows/};
2008              
2009 0         0 my $to_cx = $self->_relspec2cx($posn);
2010              
2011 0         0 my @source_cxs = map { scalar $self->_spec2cx($_) } @sources;
  0         0  
2012 0         0 my @source_cxs_before = grep { $_ < $to_cx } @source_cxs;
  0         0  
2013 0         0 my $insert_offset = $to_cx - scalar(@source_cxs_before);
2014 0         0 my @rsorted_source_cxs = sort { $b <=> $a } @source_cxs;
  0         0  
2015              
2016             $self->_logmethifv(\__fmt_colspec_cx($posn,$to_cx), \" <-- ",
2017 0         0 \join(" ",map{"$source_cxs[$_]\[$_\]"} 0..$#source_cxs));
  0         0  
2018              
2019 0 0       0 croak "move destination is too far to the right\n"
2020             if $to_cx + @sources - @source_cxs_before > $num_cols;
2021              
2022 0         0 my @old_cxs = (0..$num_cols-1);
2023              
2024 0         0 foreach my $row (@$rows, \@old_cxs) {
2025 0         0 my @moving_cells = @$row[@source_cxs]; # save
2026 0         0 splice @$row, $_, 1 foreach (@rsorted_source_cxs); # delete
2027 0         0 splice @$row, $insert_offset, 0, @moving_cells; # put back
2028             };
2029              
2030 0         0 $self->_adjust_colx(\@old_cxs);
2031 0         0 $self
2032             }
2033 0     0 1 0 sub move_col($$) { goto &move_cols; }
2034              
2035             # insert_cols ">COLSPEC",new titles (or ""s or undefs if no title row)
2036             # insert_cols "absolute-position",...
2037             # RETURNS: The new colum indicies, or in scalar context the first cx
2038             sub insert_cols($@) {
2039 0     0 1 0 my $self = &__selfmust;
2040 0         0 my ($posn, @new_titles) = @_;
2041 0         0 my ($num_cols, $rows, $title_rx) = @$$self{qw/num_cols rows title_rx/};
2042              
2043 0         0 my $to_cx = $self->_relspec2cx($posn);
2044              
2045 0         0 $self->_logmethifv(\__fmt_colspec_cx($posn,$to_cx), \" <-- ", \avis(@new_titles));
2046              
2047 0   0     0 @new_titles = map { $_ // "" } @new_titles; # change undef to ""
  0         0  
2048 0     0   0 my $have_new_titles = first { $_ ne "" } @new_titles;
  0         0  
2049 0 0 0     0 if (!defined($title_rx) && $have_new_titles) {
2050 0         0 croak "insert_cols: Can not specify non-undef titles if title_rx is not defined\n"
2051             }
2052 0         0 my $num_insert_cols = @new_titles;
2053              
2054 0         0 foreach my $row (@$rows) {
2055 0 0 0     0 if (defined $title_rx && $row == $rows->[$title_rx]) {
2056 0         0 splice @$row, $to_cx, 0, @new_titles;
2057             } else {
2058 0         0 splice @$row, $to_cx, 0, (("") x $num_insert_cols);
2059             }
2060             }
2061 0         0 $$self->{num_cols} += $num_insert_cols;
2062              
2063 0         0 $self->_adjust_colx(
2064             [ 0..$to_cx-1, ((undef) x $num_insert_cols), $to_cx..$num_cols-1 ]
2065             );
2066              
2067 0         0 __first_ifnot_wantarray( $to_cx .. $to_cx+$num_insert_cols-1 )
2068             }
2069 0     0 1 0 sub insert_col($$) { goto &insert_cols }
2070              
2071             # sort_rows {compare function}
2072             # sort_rows {compare function} $first_rx, $last_rx
2073             sub sort_rows(&) {
2074 0     0 1 0 my $self = &__selfmust;
2075 0 0       0 croak "bad args" unless @_ == 1;
2076 0         0 my ($cmpfunc, $first_rx, $last_rx) = @_;
2077              
2078             my ($rows, $linenums, $title_rx, $first_data_rx, $last_data_rx)
2079 0         0 = @$$self{qw/rows linenums title_rx first_data_rx last_data_rx/};
2080              
2081 0 0 0     0 $first_rx //= $first_data_rx
      0        
2082             // (defined($title_rx) ? $title_rx+1 : 0);
2083 0   0     0 $last_rx //= $last_data_rx // $#$rows;
      0        
2084              
2085 0         0 $self->_logmethifv(\"(sorting rx ${first_rx}..${last_rx})");
2086              
2087 0 0       0 oops unless defined($first_rx);
2088 0 0       0 oops unless defined($last_rx);
2089 0         0 my $pkg = caller;
2090             my @indicies = sort {
2091 0         0 my @row_indicies = ($a, $b);
  0         0  
2092 3     3   31 no strict 'refs';
  3         6  
  3         23186  
2093 0         0 local ${ "$pkg\::a" } = $rows->[$a]; # actual row objects
  0         0  
2094 0         0 local ${ "$pkg\::b" } = $rows->[$b];
  0         0  
2095 0         0 $cmpfunc->(@row_indicies)
2096             } ($first_rx..$last_rx);
2097              
2098 0         0 @$rows[$first_rx..$#$rows] = @$rows[@indicies];
2099 0         0 @$linenums[$first_rx..$#$rows] = @$linenums[@indicies];
2100              
2101 0         0 __validate_not_scalar_context(0..$first_rx-1, @indicies, $last_rx+1..$#$rows)
2102             }
2103              
2104             sub delete_cols(@) {
2105 0     0 1 0 my $self = &__selfmust;
2106 0         0 my (@cols) = @_;
2107 0         0 my ($num_cols, $rows) = @$$self{qw/num_cols rows/};
2108              
2109 0         0 my @cxlist = $self->_colspecs_to_cxs_ckunique(\@cols);
2110              
2111 0         0 my @reverse_cxs = sort { $b <=> $a } @cxlist;
  0         0  
2112              
2113 0         0 $self->_logmethifv(reverse @reverse_cxs);
2114 0         0 my @old_cxs = (0..$num_cols-1);
2115 0         0 for my $row (@$rows, \@old_cxs) {
2116 0         0 foreach my $cx (@reverse_cxs) {
2117 0 0       0 oops if $cx > $#$row;
2118 0         0 splice @$row, $cx, 1, ();
2119             }
2120             }
2121 0         0 $$self->{num_cols} -= @reverse_cxs;
2122 0         0 $self->_adjust_colx(\@old_cxs);
2123 0         0 $self
2124             }
2125 0     0 1 0 sub delete_col($) { goto &delete_cols; }
2126              
2127             # Logic which forces verbose on when debug is on, etc.
2128             # Used by new() and options()
2129             sub _set_verbose_debug_silent(@) {
2130 6     6   11 my $self = shift;
2131 6         42 foreach (pairs @_) {
2132 12         29 my ($key, $val) = @$_;
2133 12         25 my $oldval = $$self->{$key};
2134             next
2135 12 50       37 unless !!$oldval != !!$val;
2136 0 0       0 if ($key eq "silent") {
    0          
    0          
2137 0         0 $$self->{$key} = $val;
2138             }
2139             elsif ($key eq "verbose") {
2140 0 0       0 if ($val) {
2141 0         0 $$self->{saved_silent} = $$self->{silent};
2142 0         0 $$self->{silent} = 0; #?? might still want to suppress warnings
2143             } else {
2144 0         0 $$self->{silent} = delete $$self->{saved_silent};
2145             }
2146             }
2147             elsif ($key eq "debug") {
2148 0 0       0 if ($val) {
2149 0         0 $$self->{saved_verbose} = $$self->{verbose};
2150 0         0 $$self->{saved_silent} = $$self->{silent};
2151 0         0 $$self->{silent} = 0;
2152 0         0 $$self->{verbose} = "forced by {debug}";
2153             } else {
2154 0         0 $$self->{verbose} = delete $$self->{saved_verbose};
2155 0         0 $$self->{silent} = delete $$self->{saved_silent};
2156             }
2157             }
2158 0         0 else { confess "options: Unknown option key '$key'\n"; }
2159 0         0 $$self->{$key} = $val;
2160             }
2161             }
2162              
2163             # Get or set option(s).
2164             # New settings may be in an {OPTIONS} hash and/or linear args.
2165             # Always returns the old options (key => value pairs).
2166             sub options(@) {
2167 0     0 1 0 my $self = &__self; # auto-create sheet if necessary
2168 0 0       0 my @old = map{ exists($$self->{$_}) ? ($_ => $$self->{$_}) : () }
  0         0  
2169             qw/verbose debug silent/;
2170            
2171 0         0 my %eff_args;
2172 0 0       0 if (@_ == 0) {
2173 0 0       0 croak "(list) returned but called in scalar or void context"
2174             unless wantarray;
2175             } else {
2176 0         0 my $opthash = &__opthash; # shift off 1st arg iff it is a hashref
2177 0         0 %eff_args = (%$opthash, &__validate_pairs);
2178 0         0 $self->_set_verbose_debug_silent(%eff_args);
2179             }
2180 0         0 $self->_logmethretifv([\__fmt_pairlist(%eff_args)], [\hvis(@old)]);
2181 0         0 @old;
2182             }
2183              
2184             sub _colspecs_to_cxs_ckunique {
2185 0 0   0   0 my ($self, $colspecs) = @_; oops unless @_==2;
  0         0  
2186 0         0 my @cxlist;
2187             my %seen;
2188 0         0 foreach (@$colspecs) {
2189 0         0 my $cx = $self->_spec2cx($_); # auto-detects title_rx if needed
2190 0 0       0 if ($seen{$cx}) {
2191 0         0 croak "cx $cx is specified by multiple COLSPECs: ", vis($_)," and ",vis($seen{$cx}),"\n";
2192             }
2193 0         0 $seen{ $cx } = $_;
2194 0         0 push @cxlist, $cx;
2195             }
2196             @cxlist
2197 0         0 }
2198              
2199             sub only_cols(@) {
2200 0     0 1 0 my $self = &__selfmust;
2201 0         0 my @cols = @_;
2202 0         0 my $rows = $self->rows;
2203              
2204             # Replace each row with just the surviving columns, in the order specified
2205 0         0 my @cxlist = $self->_colspecs_to_cxs_ckunique(\@cols);
2206 0         0 for my $row (@$rows) {
2207 0         0 @$row = map{ $row->[$_] } @cxlist;
  0         0  
2208             }
2209 0         0 $$self->{num_cols} = scalar(@cxlist);
2210 0         0 $self->_adjust_colx(\@cxlist);
2211 0         0 $self
2212             }
2213              
2214             # obj->join_cols separator_or_coderef, colspecs...
2215             # If coderef:
2216             # $_ is bound to the first-named column, and is the destination
2217             # @_ is bound to all named columns, in the order named.
2218             sub join_cols(&@) {
2219 0     0 1 0 my $self = &__selfmust;
2220 0         0 my ($separator, @sources) = @_;
2221 0         0 my $hash = $$self;
2222              
2223 0         0 my ($num_cols, $rows) = @$hash{qw/num_cols rows/};
2224              
2225 0         0 my @source_cxs = map { scalar $self->_spec2cx($_) } @sources;
  0         0  
2226             $self->_logmethifv(\"'$separator' ",
2227 0         0 \join(" ",map{"$source_cxs[$_]\[$_\]"} 0..$#source_cxs));
  0         0  
2228              
2229 0         0 my $saved_v = $hash->{verbose}; $hash->{verbose} = 0;
  0         0  
2230              
2231             # Merge the content into the first column. N.B. EXCLUDES title row.
2232             my $code = ref($separator) eq 'CODE'
2233             ? $separator
2234 0 0   0   0 : sub{ $_ = join $separator, @_ } ;
  0         0  
2235              
2236             # Note first/last_data_rx are ignored
2237 0   0     0 { my $first_rx = ($hash->{title_rx} // -1)+1;
  0         0  
2238 0         0 _apply_to_rows($self, $code, \@source_cxs, undef, $first_rx, undef);
2239             }
2240              
2241             # Delete the other columns
2242 0         0 $self->delete_cols(@source_cxs[1..$#source_cxs]);
2243              
2244 0         0 $$self->{verbose} = $saved_v;
2245 0         0 $self
2246             }
2247 0     0 1 0 sub join_cols_sep($@) { goto &join_cols } # to match the functional API
2248              
2249             sub rename_cols(@) {
2250 0     0 1 0 my $self = &__selfmust;
2251 0 0       0 croak "rename_cols expects an even number of arguments\n"
2252             unless scalar(@_ % 2)==0;
2253              
2254 0         0 my ($num_cols, $rows, $title_rx) = @$$self{qw/num_cols rows title_rx/};
2255              
2256 0 0       0 croak "rename_cols: No title_rx is defined!\n"
2257             unless defined $title_rx;
2258              
2259 0         0 my $title_row = $rows->[$title_rx];
2260              
2261 0         0 while (@_) {
2262 0         0 my $old_title = shift @_;
2263 0         0 my $new_title = shift @_;
2264 0         0 my $cx = $self->_spec2cx($old_title);
2265 0         0 $self->_logmethifv($old_title, \" -> ", $new_title, \" [cx $cx]");
2266 0 0       0 croak "rename_cols: Column $old_title is too large\n"
2267             if $cx > $#$title_row; # it must have been an absolute form
2268 0         0 $title_row->[$cx] = $new_title;
2269              
2270             # N.B. aliases remain pointing to the same columns regardless of names
2271             }
2272 0         0 $self->_rebuild_colx();
2273 0         0 $self
2274             }
2275              
2276             # apply {code}, colspec*
2277             # @_ are bound to the columns in the order specified (if any)
2278             # $_ is bound to the first such column
2279             # Only visit rows bounded by first_data_rx and/or last_data_rx,
2280             # starting with title_rx+1 if a title row is defined.
2281             sub apply(&;@) {
2282 1     1 1 6 my $self = &__selfmust;
2283 1         8 my ($code, @cols) = @_;
2284 1         5 my $hash = $$self;
2285 1         8 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2286              
2287 1   50     30 my $first_rx = max(($hash->{title_rx} // -1)+1, $hash->{first_data_rx}//0);
      50        
2288              
2289 1         7 @_ = ($self, $code, \@cxs, undef, $first_rx, $hash->{last_data_rx});
2290 1         12 goto &_apply_to_rows
2291             }
2292              
2293             # apply_all {code}, colspec*
2294             # Like apply, but ALL rows are visited, inluding the title row if any
2295             sub apply_all(&;@) {
2296 0     0 1 0 my $self = &__selfmust;
2297 0         0 my ($code, @cols) = @_;
2298 0         0 my $hash = $$self;
2299 0         0 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2300 0         0 log_methcall $self, [\"rx 0..",$#{$hash->{rows}},
2301             @cxs > 0 ? \(" cxs=".avis(@cxs)) : ()]
2302 0 0       0 if $$self->{verbose};
    0          
2303 0         0 @_ = ($self, $code, \@cxs);
2304 0         0 goto &_apply_to_rows
2305             }
2306              
2307             sub __arrify_checknotempty($) {
2308 5     5   23 local $_ = shift;
2309 5 100       48 my $result = ref($_) eq 'ARRAY' ? $_ : [ $_ ];
2310             croak "Invalid argument ",vis($_)," (expecting [array ref] or single value)\n"
2311 5 50 33     66 unless @$result > 0 && !grep{ref($_) || $_ eq ""} @$result;
  5 50       80  
2312 5         20 $result
2313             }
2314              
2315             # apply_torx {code} rx, colspec*
2316             # apply_torx {code} [rx list], colspec*
2317             # Only the specified row(s) are visited
2318             # first/last_data_rx are ignored.
2319             sub apply_torx(&$;@) {
2320 5     5 1 565 my $self = &__selfmust;
2321 5         31 my ($code, $rxlist_arg, @cols) = @_;
2322 5 50       47 croak "Missing rx (or [list of rx]) argument\n" unless defined $rxlist_arg;
2323 5         49 my $rxlist = __arrify_checknotempty($rxlist_arg);
2324 5         21 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2325             log_methcall $self, [\vis($rxlist_arg),
2326             @cxs > 0 ? \(" cxs=".avis(@cxs)) : ()]
2327 5 0       45 if $$self->{verbose};
    50          
2328 5         34 @_ = ($self, $code, \@cxs, $rxlist);
2329 5         62 goto &_apply_to_rows
2330             }
2331              
2332             # apply_exceptrx {code} [rx list], colspec*
2333             # All rows EXCEPT the specified rows are visited
2334             sub apply_exceptrx(&$;@) {
2335 0     0 1 0 my $self = &__selfmust;
2336 0         0 my ($code, $exrxlist_arg, @cols) = @_;
2337 0 0       0 croak "Missing rx (or [list of rx]) argument\n" unless defined $exrxlist_arg;
2338 0         0 my $exrxlist = __arrify_checknotempty($exrxlist_arg);
2339 0         0 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2340             log_methcall $self, [\vis($exrxlist_arg),
2341             @cxs > 0 ? \(" cxs=".avis(@cxs)) : ()]
2342 0 0       0 if $$self->{verbose};
    0          
2343 0         0 my $hash = $$self;
2344 0         0 my $max_rx = $#{ $hash->{rows} };
  0         0  
2345 0         0 foreach (@$exrxlist) {
2346 0 0 0     0 croak "rx $_ is out of range\n" if $_ < 0 || $_ > $max_rx;
2347             }
2348 0         0 my %exrxlist = map{ $_ => 1 } @$exrxlist;
  0         0  
2349 0         0 my $rxlist = [ grep{ ! exists $exrxlist{$_} } 0..$max_rx ];
  0         0  
2350 0         0 @_ = ($self, $code, \@cxs, $rxlist);
2351 0         0 goto &_apply_to_rows
2352             }
2353              
2354             # split_col {code} oldcol, newcol_start_position, new titles...
2355             # {code} is called for each row with $_ bound to <oldcol>
2356             # and @_ bound to the new column(s).
2357             # The old column is left as-is (not deleted).
2358             sub split_col(&$$$@) {
2359 0     0 1 0 my $self = &__selfmust;
2360 0         0 my ($code, $oldcol_posn, $newcols_posn, @new_titles) = @_;
2361              
2362 0         0 my $num_insert_cols = @new_titles;
2363 0         0 my $old_cx = $self->_spec2cx($oldcol_posn);
2364 0         0 my $newcols_first_cx = $self->_relspec2cx($newcols_posn);
2365              
2366             log_methcall $self, [\"... $oldcol_posn\[$old_cx] -> [$newcols_first_cx]",
2367             avis(@new_titles)]
2368 0 0       0 if $$self->{verbose};
2369              
2370 0         0 local $$self->{verbose} = 0;
2371              
2372 0         0 $self->insert_cols($newcols_first_cx, @new_titles);
2373              
2374 0 0       0 $old_cx += $num_insert_cols if $old_cx >= $newcols_first_cx;
2375              
2376 0         0 $self->apply($code,
2377             $old_cx, $newcols_first_cx..$newcols_first_cx+$num_insert_cols-1);
2378              
2379 0         0 $self
2380             }
2381              
2382             sub reverse_cols() {
2383 0     0 1 0 my $self = &__selfmust;
2384 0         0 my ($rows, $num_cols) = @$$self{qw/rows num_cols/};
2385 0 0       0 log_methcall $self, [] if $$self->{verbose};
2386 0         0 for my $row (@$rows) {
2387 0         0 @$row = reverse @$row;
2388             }
2389 0         0 $self->_adjust_colx([reverse 0..$num_cols-1]);
2390 0         0 $self
2391             }
2392              
2393             sub transpose() {
2394 0     0 1 0 my $self = &__selfmust;
2395 0 0       0 log_methcall $self, [] if $$self->{verbose};
2396              
2397 0         0 my ($rows, $old_num_cols, $linenums) = @$$self{qw/rows num_cols linenums/};
2398              
2399 0         0 $$self->{useraliases} = {};
2400 0         0 $$self->{title_rx} = undef;
2401 0         0 $$self->{first_data_rx} = undef;
2402 0         0 $$self->{last_data_rx} = undef;
2403              
2404             # Save a copy of the data
2405 0         0 my @old_rows = ( map{ [ @$_ ] } @$rows );
  0         0  
2406              
2407             # Rebuild the spreadsheet
2408 0         0 @$rows = ();
2409 0         0 $$self->{num_cols} = scalar @old_rows;
2410              
2411 0         0 for (my $ocx=0; $ocx < $old_num_cols; ++$ocx) {
2412 0         0 my @nrow;
2413 0         0 for my $row (@old_rows) {
2414 0   0     0 push @nrow, $row->[$ocx] // "";
2415             }
2416 0         0 push @$rows, \@nrow;
2417             }
2418 0 0       0 if ($$self->{saved_linenums}) {
2419 0         0 @$linenums = @{ $$self->{saved_linenums} };
  0         0  
2420 0         0 delete $$self->{saved_linenums};
2421             } else {
2422 0         0 $$self->{saved_linenums} = [ @$linenums ];
2423 0         0 @$linenums = ("?") x scalar @$rows;
2424             }
2425 0   0     0 $$self->{data_source} //= "";
2426 0         0 $$self->{data_source} .= " transposed";
2427              
2428 0         0 $self->_rows_replaced;
2429 0         0 $self
2430             }#transpose
2431              
2432             # delete_rows rx ...
2433             # delete_rows 'LAST' ...
2434             # delete_rows '$' ...
2435             sub delete_rows(@) {
2436 0     0 1 0 my $self = &__selfmust;
2437 0         0 my (@rowspecs) = @_;
2438              
2439             my ($rows, $linenums, $title_rx, $first_data_rx, $last_data_rx, $current_rx, $verbose)
2440 0         0 = @$$self{qw/rows linenums title_rx first_data_rx last_data_rx current_rx verbose/};
2441              
2442 0         0 foreach (@rowspecs) {
2443 0 0       0 $_ = $#$rows if /^(?:LAST|\$)$/;
2444 0         0 __validate_nonnegi($_, "rx to delete");
2445 0 0       0 croak "Invalid row index '$_'\n" unless $_ <= $#$rows;
2446             }
2447 0         0 my @rev_sorted_rxs = sort {$b <=> $a} @rowspecs;
  0         0  
2448 0 0       0 log_methcall $self, [reverse @rev_sorted_rxs] if $$self->{verbose};
2449              
2450             # Adjust if needed...
2451 0 0       0 if (defined $title_rx) {
2452 0         0 foreach (@rev_sorted_rxs) {
2453 0 0       0 if ($_ < $title_rx) { --$title_rx }
  0 0       0  
2454             elsif ($_ == $title_rx) {
2455             $self->_log("Invalidating titles because rx $title_rx is being deleted\n")
2456 0 0       0 if $$self->{verbose};
2457 0         0 $title_rx = undef;
2458 0         0 last;
2459             }
2460             }
2461 0         0 $$self->{title_rx} = $title_rx;
2462             }
2463 0 0       0 if (defined $first_data_rx) {
2464 0         0 foreach (@rev_sorted_rxs) {
2465 0 0       0 if ($_ <= $first_data_rx) { --$first_data_rx }
  0         0  
2466             }
2467 0         0 $$self->{first_data_rx} = $first_data_rx;
2468             }
2469 0 0       0 if (defined $last_data_rx) {
2470 0         0 foreach (@rev_sorted_rxs) {
2471 0 0       0 if ($_ <= $last_data_rx) { --$last_data_rx }
  0         0  
2472             }
2473 0         0 $$self->{last_data_rx} = $last_data_rx;
2474             }
2475              
2476             # Back up $current_rx to account for deleted rows.
2477             # $current_rx is left set to one less than the index of the "next" row if
2478             # we are in an apply(). That is, current_rx will be left still pointing to
2479             # the same row as before, or if that row has been deleted then the row
2480             # before that (or -1 if row zero was deleted).
2481 0 0       0 if (defined $current_rx) {
2482 0         0 foreach (@rev_sorted_rxs) {
2483 0 0       0 --$current_rx if ($_ <= $current_rx);
2484             }
2485 0         0 $$self->{current_rx} = $current_rx;
2486             }
2487              
2488             #warn "### BEFORE delete_rows rx (@rev_sorted_rxs):\n",
2489             # map( { " [$_]=(".join(",",@{$rows->[$_]}).")\n" } 0..$#$rows);
2490              
2491 0         0 for my $rx (@rev_sorted_rxs) {
2492 0         0 splice @$rows, $rx, 1, ();
2493 0         0 splice @$linenums, $rx, 1, ();
2494             }
2495              
2496             #warn "### AFTER delete_rows:\n",
2497             # map( { " [$_]=(".join(",",@{$rows->[$_]}).")\n" } 0..$#$rows);
2498             $self
2499 0         0 }#delete_rows
2500 0     0 0 0 sub delete_row($) { goto &delete_rows; }
2501              
2502             # $firstrx = insert_rows [rx [,count]]
2503             # $firstrx = insert_rows ['$'[,count]]
2504             sub insert_rows(;$$) {
2505 0     0 1 0 my $self = &__selfmust;
2506 0         0 my ($rx, $count) = @_;
2507 0   0     0 $rx //= 'END';
2508 0   0     0 $count //= 1;
2509              
2510             my ($rows, $linenums, $num_cols, $title_rx, $first_data_rx, $last_data_rx)
2511 0         0 = @$$self{qw/rows linenums num_cols title_rx first_data_rx last_data_rx/};
2512              
2513 0 0       0 $rx = @$rows if $rx =~ /^(?:END|\$)$/;
2514              
2515             log_methcall $self, [\"at rx $rx (count $count)"], [$rx]
2516 0 0       0 if $$self->{verbose};
2517              
2518 0         0 __validate_nonnegi($rx, "new rx");
2519              
2520 0 0 0     0 if (defined($title_rx) && $rx <= $title_rx) {
2521 0         0 $$self->{title_rx} = ($title_rx += $count);
2522             }
2523 0 0 0     0 if (defined($first_data_rx) && $rx <= $first_data_rx) {
2524 0         0 $$self->{first_data_rx} = ($first_data_rx += $count);
2525             }
2526 0 0 0     0 if (defined($last_data_rx) && $rx <= $last_data_rx) {
2527 0         0 $$self->{last_data_rx} = ($last_data_rx += $count);
2528             }
2529              
2530 0         0 for (1..$count) {
2531 0         0 splice @$rows, $rx, 0, [("") x $num_cols];
2532 0         0 splice @$linenums, $rx, 0, "??";
2533             }
2534              
2535 0         0 $rx;
2536             }
2537 0     0 1 0 sub insert_row(;$) { goto &insert_rows; }
2538              
2539             # read_spreadsheet $inpath [Spreadsheet::Edit::IO::OpenAsCSV options...]
2540             # read_spreadsheet $inpath [,iolayers =>... or encoding =>...]
2541             # read_spreadsheet $inpath [,{iolayers =>... or encoding =>... }] #OLD API
2542             # read_spreadsheet [{iolayers =>... or encoding =>... }, ] $inpath #NEW API
2543             #
2544             # Titles are auto-detected by default, but this may be controlled
2545             # via {OPTIONS}:
2546             # title_rx => rx # Don't autodetect; set the specified title_rx
2547             # title_rx => undef # Don't autodetect; no titles
2548             # OR: autodetect options, passed thru to title_rx()
2549             #
2550             sub read_spreadsheet($;@) {
2551 2     2 1 5 my ($self, $opthash, $inpath) = &__self_opthash_1arg;
2552 2         5 my $orig_opthash = { %$opthash };
2553              
2554 2         7 my $saved_stdopts = $self->_set_stdopts($opthash);
2555 2     2   17 scope_guard{ $self->_restore_stdopts($saved_stdopts) };
  2         6  
2556              
2557 2         14 my %csvopts = @sane_CSV_read_options;
2558             # Separate out Text::CSV options from %$opthash
2559 2         22 foreach my $key (Text::CSV::known_attributes()) {
2560             #$csvopts{$key} = delete $opthash{$key} if exists $opthash{$key};
2561 62 50       185 $csvopts{$key} = $opthash->{$key} if defined $opthash->{$key};
2562 62         78 delete $opthash->{$key};
2563             }
2564 2         9 $csvopts{escape_char} = $csvopts{quote_char}; # " : """
2565              
2566             croak "Obsolete {sheet} key in options (use 'sheetname')"
2567 2 50       7 if exists $opthash->{sheet};
2568              
2569 2         23 __validate_opthash( $opthash,
2570             [
2571             qw/title_rx/,
2572             qw/iolayers encoding verbose silent debug/,
2573             qw/tempdir use_gnumeric raw_values sheetname/, # for OpenAsCsv
2574             qw/required min_rx max_rx first_cx last_cx/, # for title_rx
2575             ],
2576             desc => "read_spreadsheet option",
2577             undef_ok => [qw/title_rx verbose silent debug use_gnumeric/] );
2578              
2579             # convert {encoding} to {iolayers}
2580 2 50       10 if (my $enc = delete $opthash->{encoding}) {
2581             #warn "Found OBSOLETE read_spreadsheet 'encoding' opt (use iolayers instead)\n";
2582 0   0     0 $opthash->{iolayers} = ($opthash->{iolayers}//"") . ":encoding($enc)";
2583             }
2584             # Same as last-used, if any
2585             # N.B. If user says nothing, OpenAsCsv() defaults to UTF-8
2586 2   50     31 $opthash->{iolayers} //= $$self->{iolayers} // "";
      33        
2587              
2588             my ($rows, $linenums, $meta_info, $verbose, $debug)
2589 2         7 = @$$self{qw/rows linenums meta_info verbose debug/};
2590              
2591             ##$self->_check_currsheet;
2592              
2593 2         4 my $hash;
2594 2         6 { local $$self->{verbose} = 0;
  2         4  
2595             $hash = OpenAsCsv(
2596             inpath => $inpath,
2597             debug => $$self->{debug},
2598             silent => $$self->{silent},
2599 2   33     24 verbose => ($$self->{verbose} || $$self->{debug}),
2600             %$opthash, # all our opts are valid here
2601             );
2602             }
2603              
2604             ### TODO: Split off the following into a separate read_csvdata() method
2605             ### which takes a file handle? This might be useful so users
2606             ### can open arbitrary sources (even a pipe) and parse the data
2607             ### (e.g. /etc/passwd with : as the separator).
2608             ### ...but unclear how to handle encoding
2609              
2610             # Save possibly-defaulted iolayers for use in subsequent write_csv
2611 2   33     23 $$self->{iolayers} //= $hash->{iolayers};
2612              
2613 2         3 my $fh = $hash->{fh};
2614              
2615 2         5 $csvopts{keep_meta_info} = 1;
2616 2 50       10 my $csv = Text::CSV->new (\%csvopts)
2617             or croak "read_spreadsheet: ".Text::CSV->error_diag ()
2618             .dvis('\n## %csvopts\n');
2619              
2620 2         429 undef $$self->{num_cols};
2621 2         20 @$rows = ();
2622 2         4 @$linenums = ();
2623 2         4 my $lnum = 1;
2624 2         80 while (my $F = $csv->getline( $fh )) {
2625 6         368 push(@$linenums, $lnum);
2626 6         24 my @minfo = $csv->meta_info();
2627             # Force quoting of fields which look like negative numbers with an ascii
2628             # minus (\x{2D}) rather than Unicode math minus (\N{U+2212}).
2629             # This prevents conversion to the Unicode math minus when LibreOffice
2630             # reads the CSV. The assumption is that if the input, when converted
2631             # TO a csv, has an ascii minus then the original spreadsheet cell format
2632             # was "text" not numeric.
2633 6         59 for my $cx (0..$#$F) {
2634             #...TODO $minfo[$cx] |= 0x0001 if $F->[$cx] =~ /^-[\d.]+$/a;
2635             }
2636 6         11 push(@$meta_info, \@minfo);
2637 6         16 $lnum = $.+1;
2638 6         20 push(@$rows, $F);
2639             }
2640 2 50       214 close $fh || croak "Error reading $hash->{csvpath}: $!\n";
2641              
2642             $$self->{data_source} =
2643 2         17 form_spec_with_sheetname($hash->{inpath}, $hash->{sheetname});
2644 2         7 $$self->{sheetname} = $hash->{sheetname}; # possibly undef
2645              
2646 2         7 $self->_rows_replaced;
2647              
2648             # Set title_rx, either to a value explicitly given in OPTIONS (possibly
2649             # undef, meaning no titles) or else auto-detect.
2650 2         4 my %autodetect_opts;
2651 2         5 foreach (qw/required min_rx max_rx first_cx last_cx/) {
2652 10 50       27 $autodetect_opts{$_} = $opthash->{$_} if exists($opthash->{$_});
2653             }
2654            
2655 2 100       8 my $arg = exists($opthash->{title_rx}) ? $opthash->{title_rx} : 'auto';
2656 2         3 { local $$self->{cmd_nesting} = $$self->{cmd_nesting} + 1;
  2         9  
2657 2         7 $autodetect_opts{verbose} = 0; # suppress logging
2658 2         10 $self->title_rx(\%autodetect_opts, $arg);
2659             }
2660              
2661             log_methcall $self, [$orig_opthash, $inpath,
2662             \" [title_rx set to ",vis($$self->{title_rx}),\"]"]
2663 2 50       14 if $$self->{verbose};
2664              
2665 2         25 $self
2666             }#read_spreadsheet
2667              
2668             # write_csv {OPTHASH} "/path/to/output.csv"
2669             # Cells will be quoted if the input was quoted, i.e. if indicated by meta_info.
2670             sub write_csv(*;@) {
2671 0     0 1 0 my $self = &__selfmust;
2672 0 0       0 my $opts = ref($_[0]) eq 'HASH' ? shift() : {};
2673 0         0 my $dest = shift;
2674              
2675 0         0 my %csvopts = ( @sane_CSV_write_options,
2676             quote_space => 0, # dont quote embedded spaces
2677             );
2678             # Separate out Text::CSV options from {OPTIONS}
2679 0         0 foreach my $key (Text::CSV::known_attributes()) {
2680 0 0       0 $csvopts{$key} = $opts->{$key} if defined $opts->{$key};
2681 0         0 delete $opts->{$key};
2682             }
2683              
2684 0         0 { my %notok = %$opts;
  0         0  
2685 0         0 delete $notok{$_} foreach (
2686             #removed above... Text::CSV::known_attributes(),
2687             qw/verbose silent debug/,
2688             );
2689 0 0       0 croak "Unrecognized OPTION(s): ",avislq(keys %notok) if %notok;
2690             }
2691              
2692 0   0     0 $opts->{iolayers} //= $$self->{iolayers} // "";
      0        
2693             # New API: opts->{iolayers} may have all 'binmode' arguments.
2694             # If it does not include encoding(...) then insert default
2695 0 0       0 if ($opts->{iolayers} !~ /encoding\(|:utf8/) {
2696 0   0     0 $opts->{iolayers} .= ":encoding(".
2697             ($self->input_encoding() || DEFAULT_WRITE_ENCODING)
2698             .")";
2699             }
2700 0 0       0 if ($opts->{iolayers} !~ /:(?:crlf|raw)\b/) {
2701             # Use platform default
2702             #$opts->{iolayers} .= ":crlf";
2703             }
2704              
2705             my ($rows, $meta_info, $num_cols, $verbose, $debug)
2706 0         0 = @$$self{qw/rows meta_info num_cols verbose debug/};
2707              
2708 0         0 my $fh;
2709 0 0       0 if (openhandle($dest)) { # an already-open file handle?
2710             log_methcall $self, [$opts, \("<file handle specified> $opts->{iolayers} "
2711             .scalar(@$rows)." rows, $num_cols columns)")]
2712 0 0       0 if $$self->{verbose};
2713 0         0 $fh = $dest;
2714             } else {
2715             log_methcall $self, [$opts, \($dest." $opts->{iolayers} ("
2716             .scalar(@$rows)." rows, $num_cols columns)")]
2717 0 0       0 if $$self->{verbose};
2718 0 0 0     0 croak "Output path suffix must be *.csv, not\n ",qsh($dest),"\n"
2719             if $dest =~ /\.([a-z]*)$/ && lc($1) ne "csv";
2720 0 0       0 open $fh,">$dest" or croak "$dest: $!\n";
2721             }
2722              
2723 0 0       0 binmode $fh, $opts->{iolayers} or die "binmode:$!";
2724              
2725             # Arrgh. Although Text::CSV is huge and complex and implements a complicated
2726             # meta_info mechanism to capture quoting details on input, there is no way to
2727             # use the captured info to specify quoting of output fields!
2728             # So we implement writing CSVs by hand here.
2729             #my $csv = Text::CSV->new (\%csvopts)
2730             # or die "write_csv: ".Text::CSV->error_diag ();
2731             #foreach my $row (@$rows) {
2732             # oops "UNDEF row" unless defined $row; # did user modify @rows?
2733             # $csv->print ($fh, $row);
2734             #};
2735              
2736             # 5/2/22 FIXME: Maybe meta_info could be used when writing, albiet in
2737             # a grotesque way:
2738             # If keep_meta_info is set > 9, then the output quotation style is
2739             # "like it was used in the input of the the last parsed record"; so
2740             # we could "parse" a dummy record to set the quote style before writing
2741             # each record, like this (see perldoc Text::CSV_XS "keep_meta_info"):
2742             # my $csv = Text::CSV_XS->new({ binary=>1, keep_meta_info=>11,
2743             # quote_space => 0 });
2744             # apply_all {
2745             # my $minfo = $meta_info[$rx];
2746             # my @dummy = map{ '', 'x', '""' or '"x'' } @$minfo; # HOW?
2747             # $csv->parse(join ",", @dummy); # set saved meta_info
2748             # $csv->print(*OUTHANDLE, $row);
2749             # }
2750             #
2751              
2752             # Much of the option handling code was copied from Text::CSV_PP.pm
2753             # which depends on default values of options we don't specify explicitly.
2754             # So create a Text::CSV object just to get the effective option values...
2755 0         0 { my $o = Text::CSV->new( \%csvopts );
  0         0  
2756 0         0 foreach my $key (Text::CSV::known_attributes()) {
2757 0         0 $csvopts{$key} = $o->{$key};
2758             }
2759             }
2760              
2761             my $re_esc = ($csvopts{escape_char} ne '' and $csvopts{escape_char} ne "\0")
2762 0 0 0     0 ? ($csvopts{quote_char} ne '') ? qr/(\Q$csvopts{quote_char}\E|\Q$csvopts{escape_char}\E)/ : qr/(\Q$csvopts{escape_char}\E)/
    0          
2763             : qr/(*FAIL)/;
2764 0         0 for my $rx (0..$#$rows) {
2765 0         0 my $row = $rows->[$rx];
2766 0         0 my $minfo = $meta_info->[$rx];
2767 0         0 my @results;
2768 0         0 for my $cx (0..$num_cols-1) {
2769 0         0 my $value = $row->[$cx];
2770 0 0       0 confess "ERROR: rx $rx, cx $cx : undef cell value" unless defined($value);
2771 0         0 my $mi = $minfo->[$cx]; # undef if input was missing columns in this row
2772             my $must_be_quoted = $csvopts{always_quote} ||
2773 0   0     0 (($mi//0) & 0x0001); # was quoted on input
2774 0 0       0 unless ($must_be_quoted) {
2775 0 0       0 if ($value eq '') {
2776 0 0       0 $must_be_quoted = 42 if $csvopts{quote_empty};
2777             } else {
2778 0 0       0 if ($csvopts{quote_char} ne '') {
2779 3     3   40 use bytes;
  3         8  
  3         30  
2780             $must_be_quoted=43 if
2781             ($value =~ /\Q$csvopts{quote_char}\E/) ||
2782             ($csvopts{sep_char} ne '' and $csvopts{sep_char} ne "\0" and $value =~ /\Q$csvopts{sep_char}\E/) ||
2783             ($csvopts{escape_char} ne '' and $csvopts{escape_char} ne "\0" and $value =~ /\Q$csvopts{escape_char}\E/) ||
2784             ($csvopts{quote_binary} && $value =~ /[\x00-\x1f\x7f-\xa0]/) ||
2785 0 0 0     0 ($csvopts{quote_space} && $value =~ /[\x09\x20]/);
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
2786             }
2787             }
2788             }
2789 0         0 $value =~ s/($re_esc)/$csvopts{escape_char}$1/g;
2790 0 0       0 if ($csvopts{escape_null}) {
2791 0         0 $value =~ s/\0/$csvopts{escape_char}0/g;
2792             }
2793 0 0       0 if ($must_be_quoted) {
2794 0         0 $value = $csvopts{quote_char} . $value . $csvopts{quote_char};
2795             }
2796 0 0       0 $fh->print($csvopts{sep_char}) unless $cx==0;
2797 0         0 $fh->print($value);
2798             }
2799 0         0 $fh->print($csvopts{eol});
2800             }
2801              
2802 0 0       0 if (! openhandle $dest) {
2803 0 0       0 close $fh || croak "Error writing $dest : $!\n";
2804             }
2805             $self
2806 0         0 }#write_csv
2807              
2808             # Write spreadsheet with specified column formats
2809             # {col_formats} is required
2810             # Unless {sheetname} is specified, the sheet name is the outpath basename
2811             # sans any suffix
2812             sub write_spreadsheet(*;@) {
2813 0     0 1 0 my ($self, $opts, $outpath) = &__self_opthash_1arg;
2814 0         0 my $colx = $$self->{colx};
2815              
2816 0 0       0 log_methcall $self, [$opts, $outpath] if $$self->{verbose};
2817              
2818             # {col_formats} may be [list of formats in column order]
2819             # or { COLSPEC => fmt, ..., __DEFAULT__ => fmt }
2820             # Transform the latter to the former...
2821 0   0     0 my $cf = $opts->{col_formats} // croak "{col_formats} is required";
2822 0 0       0 if (ref($cf) eq "HASH") {
2823 0         0 my ($default, @ary);
2824 0         0 while (my ($key, $fmt) = each %$cf) {
2825 0 0       0 ($default = $fmt),next if $key eq "__DEFAULT__";
2826 0   0     0 my $cx = $colx->{$key} // croak("Invalid COLSPEC '$key' in col_formats");
2827 0         0 $ary[$cx] = $fmt;
2828             }
2829 0 0       0 foreach (@ary) { $_ = $default if ! defined; }
  0         0  
2830 0         0 $cf = \@ary;
2831             }
2832 0         0 local $opts->{col_formats} = $cf;
2833              
2834 0         0 my ($csvfh, $csvpath) = tempfile(SUFFIX => ".csv");
2835 0         0 { local $$self->{verbose} = 0;
  0         0  
2836 0         0 $self->write_csv($csvfh, silent => 1, iolayers => ':encoding(UTF-8)',
2837             @sane_CSV_write_options);
2838             }
2839 0 0       0 close $csvfh or die "Error writing $csvpath : $!";
2840              
2841             # Default sheet name to output file basename sans suffix
2842 0   0     0 $opts->{sheetname} //= fileparse($outpath, qr/\.\w+/);
2843              
2844 0         0 convert_spreadsheet($csvpath,
2845             %$opts,
2846             iolayers => ':encoding(UTF-8)',
2847             cvt_from => "csv",
2848             outpath => $outpath,
2849             );
2850 0         0 $self
2851             }
2852              
2853             #====================================================================
2854             # These helpers are used by predefined magic sheet variables.
2855             # See code in Spreadsheet::Edit::import()
2856              
2857             # Return $self if during an apply, or if being examined by Data::Dumper ;
2858             # otherwise croak
2859             sub _onlyinapply {
2860 7     7   20 my ($self, $accessor) = @_;
2861 7 50       33 unless (defined $$self->{current_rx}) {
2862 0         0 for (my $lvl=2; ;$lvl++) {
2863 0   0     0 my $pkg = (caller($lvl))[0] || last;
2864 0 0 0     0 return $self
2865             if defined($pkg) && $pkg->isa("Data::Dumper") # perldoc UNIVERSAL
2866             }
2867 0         0 croak "Can't use $accessor now: Not during apply*\n"
2868             }
2869             $self
2870 7         21 }
2871             sub __getsheet($$$$) {
2872 9     9   51 my ($mutating, $pkg, $uvar, $onlyinapply) = @_;
2873 9         33 my $sheet = $pkg2currsheet{$pkg};
2874 9 50       46 croak("Modifying variable $uvar is not allowed\n")
2875             if $mutating;
2876 9 50       44 croak("Can not use $uvar: No sheet is currently valid for package $pkg\n")
2877             unless defined $sheet;
2878 9 100       52 $onlyinapply ? _onlyinapply($sheet, $uvar) : $sheet
2879             }
2880             sub _scal_tiehelper { # access a scalar sheet variable
2881 5     5   618 my($mutating, $pkg, $uvar, $ident, $onlyinapply) = @_;
2882 5         20 my $sheet = __getsheet($mutating, $pkg, $uvar, $onlyinapply);
2883 5 50       20 confess avisq(@_) unless exists $$sheet->{$ident};
2884 5         42 return \$$sheet->{$ident}; # return ref to the scalar
2885             }
2886             sub _aryelem_tiehelper { # access an element of an array sheet variable
2887 0     0   0 my($mutating, $pkg, $uvar, $index_ident, $array_ident, $onlyinapply) = @_;
2888             # E.g. for $title_row : index_ident="title_rx" and array_ident="rows"
2889 0         0 my $sheet = __getsheet($mutating, $pkg, $uvar, $onlyinapply);
2890 0   0     0 my $aref = $$sheet->{$array_ident} // oops dvisq '$array_ident @_'; # e.g. {rows}
2891 0   0     0 my $index = $$sheet->{$index_ident} // do{
2892 0 0 0     0 if ($index_ident eq "current_rx" or $index_ident eq "title_rx") {
2893             return \undef # During Data::Dumper inspection of current_row?
2894 0         0 }
2895 0         0 oops dvis '$array_ident $index_ident'; # otherwise it's a bug
2896             };
2897 0 0       0 oops(dvisq '@_ $index') if $index > $#$aref;
2898 0         0 return \$aref->[$index]; # return ref to scalar (the element in the array)
2899             }
2900             sub _refval_tiehelper { # access a sheet variable which is a ref of some kind
2901 4     4   1671240 my($mutating, $pkg, $uvar, $field_ident, $onlyinapply, $mutable) = @_;
2902 4 50       51 $mutating = 0 if $mutable;
2903 4         35 my $sheet = __getsheet($mutating, $pkg, $uvar, $onlyinapply);
2904 4         173 return $$sheet->{$field_ident}; # return the value, which is itself a ref
2905             }
2906              
2907             # Retrieve the sheet currently accessed by the functional API & tied globals
2908             # in the caller's package (or the specified package).
2909             # If an argument is passed, change the sheet to the specified sheet.
2910             #
2911             # Always returns the previous sheet (or undef)
2912             sub sheet(;$$) {
2913 14     14 1 536 my $opthash = &__opthash;
2914 14   66     77 my $pkg = $opthash->{package} // caller();
2915 14 50       48 oops if index($pkg,__PACKAGE__) >= 0; # not us or sub-pkg
2916 14 100       57 my $pkgmsg = $opthash->{package} ? " [for pkg $pkg]" : "";
2917 14         26 my $curr = $pkg2currsheet{$pkg};
2918 14   33     77 my $verbose = $opthash->{verbose} || ($curr && $$curr->{verbose});
2919 14 100       34 if (@_) {
2920 12         101 my $new = __validate_sheet_arg(shift @_);
2921 12 50       33 croak "Extraneous argument(s) in call to sheet()" if @_;
2922 12 100       37 if (defined $new) {
2923 9 50       34 oops if $$new->{cmd_nesting};
2924 9   33     63 $verbose ||= $$new->{verbose};
2925             }
2926              
2927 12 0       26 log_call [$opthash, \(" ".fmt_sheet($new)),
    50          
2928             \(u($curr) eq u($new)
2929             ? " [no change]"
2930             : " [previous: ".fmt_sheet($curr)."]"),
2931             \$pkgmsg]
2932             if $verbose;
2933              
2934 12         30 $pkg2currsheet{$pkg} = $new;
2935             } else {
2936 2 50       15 log_call [$opthash], [\fmt_sheet($curr), \$pkgmsg]
2937             if $verbose;
2938             }
2939 14         118 $curr
2940             }
2941              
2942             #====================================================================
2943             package
2944             Spreadsheet::Edit::RowsTie; # implements @rows and @$sheet
2945 3     3   6760 use parent 'Tie::Array';
  3         8  
  3         19  
2946              
2947 3     3   2167 use Carp;
  3         9  
  3         303  
2948             #our @CARP_NOT = qw(Tie::Indirect Tie::Indirect::Array
2949             # Tie::Indirect::Hash Tie::Indirect::Scalar);
2950 3         26 use Data::Dumper::Interp 6.004 qw/visnew
2951             vis viso avis alvis ivis dvis hvis hlvis
2952             visq visoq avisq alvisq ivisq dvisq hvisq hlvisq
2953 3     3   24 addrvis rvis rvisq u quotekey qsh qshlist qshpath/;
  3         55  
2954              
2955 3     3   4701 use Scalar::Util qw(looks_like_number weaken);
  3         16  
  3         2265  
2956 0     0   0 sub oops(@) { goto &Spreadsheet::Edit::oops }
2957              
2958             sub TIEARRAY {
2959 4     4   13 my ($classname, $sheet) = @_;
2960 4         12 my $o = bless [ [], $sheet], $classname;
2961 4         31 weaken $o->[1];
2962 4         11 $o
2963             }
2964             sub FETCH {
2965 47     47   87 my ($this, $index) = @_;
2966 47         88 my $aref = $this->[0];
2967 47 50 33     166 croak "Index ",u($index)," is invalid or out of range"
2968             unless $index >= 0 && $index <= $#$aref;
2969 47         198 $aref->[$index];
2970             }
2971             sub STORE {
2972 12     12   60 my ($this, $index, $val) = @_;
2973 12         33 my ($aref, $sheet) = @$this;
2974 12 50 33     67 croak "Index ",u($index)," is invalid or out of range"
2975             unless $index >= 0 && $index <= $#$aref+1;
2976 12 50       29 croak "Value must be a ref to array of cell values (not $val)"
2977             if ! Spreadsheet::Edit::__looks_like_aref($val);
2978             croak "Cell values may not be undef"
2979 12 50       29 if grep{! defined} @$val;
  30         74  
2980             croak "Cell values must be strings or numbers"
2981 12 50       29 if grep{ ref($_) && !looks_like_number($_) } @$val;
  30 50       79  
2982 12 50       98 if (my $num_cols = $$sheet->{num_cols}) {
2983 0 0       0 croak "New row must contain $num_cols cells (not ", $#$val+1, ")"
2984             if @$val != $num_cols;
2985             }
2986             # else (0 or undef) someone promises to set it later
2987              
2988             # Store a *copy* of the data to dispose of a Magicrow wrapper, if present
2989 12         48 my $cells = [ @$val ];
2990 12         56 $aref->[$index] = Spreadsheet::Edit::Magicrow->new($sheet, $cells);
2991             }
2992 245     245   437 sub FETCHSIZE { scalar @{ $_[0]->[0] } }
  245         974  
2993             sub STORESIZE {
2994 2     2   9 my ($this, $newlen) = @_;
2995 2         4 $#{ $this->[0] } = $newlen-1;
  2         10  
2996             }
2997             # End packageSpreadsheet::Edit::RowsTie
2998              
2999             #====================================================================
3000             package
3001             Spreadsheet::Edit::Magicrow;
3002              
3003 3     3   29 use Carp;
  3         16  
  3         421  
3004             our @CARP_NOT = qw(Spreadsheet::Edit);
3005 3     3   24 use Scalar::Util qw(weaken blessed looks_like_number);
  3         6  
  3         363  
3006 0     0   0 sub oops(@) { goto &Spreadsheet::Edit::oops }
3007 3     3   26 use Data::Dumper::Interp;
  3         6  
  3         21  
3008              
3009             sub new {
3010 12     12   28 my ($classname, $sheet, $cells) = @_;
3011 12         21 my %hashview; tie %hashview, __PACKAGE__, $cells, $sheet;
  12         40  
3012 12         66 bless \ [$cells, \%hashview], $classname;
3013             }
3014 110     110   167 use overload '@{}' => sub { ${ shift() }->[0] },
  110         335  
3015 4     4   21 '%{}' => sub { ${ shift() }->[1] },
  4         196  
3016             #'""' => sub { shift }, # defeats vis overload eval!
3017             #'0+' => sub { shift },
3018             #'==' => sub { my ($self, $other, $swap) = @_; $self == $other },
3019             #'eq' => sub { my ($self, $other, $swap) = @_; "$self" eq "$other" },
3020 3         26 fallback => 1, # for "" etc. FIXME: is this really ok?
3021 3     3   1850 ;
  3         9  
3022              
3023             sub TIEHASH {
3024 12     12   25 my ($pkg, $cells, $sheet) = @_;
3025 12         45 my $o = bless \ [$cells, $sheet], $pkg;
3026 12         107 weaken $$o->[1];
3027 12         29 $o
3028             }
3029             sub _cellref {
3030 4     4   8 my ($cells, $sheet) = @{ ${ shift() } }; # First arg is 'self'
  4         14  
  4         26  
3031 4         10 my $key = shift; # Second arg is key
3032 4         9 my $mutating = @_; # Third arg exists only for STORE
3033 4         11 my $colx = $$sheet->{colx};
3034 4         9 my $cx = $colx->{$key};
3035              
3036 4 50       21 if (! defined $cx) {
3037 0 0       0 exists($colx->{$key})
3038             or croak "'$key' is an unknown COLSPEC. The valid keys are:\n",
3039             $sheet->_fmt_colx();
3040             # Undef colx results from alias({optional => TRUE},...) which failed,
3041             # or from an alias which became invalid because the column was deleted.
3042 0 0       0 croak "Attempt to write to 'optional' alias '$key' which is currently NOT DEFINED"
3043             if $mutating;
3044             return \undef # Reading such a column returns undef
3045 0         0 }
3046 4   33     8 $cx <= $#{$cells}
  4         28  
3047             // croak "BUG?? key '$key' maps to cx $cx which is out of range!";
3048 4         48 \$cells->[$cx]
3049             }
3050             sub FETCH {
3051 4     4   22 ${ &_cellref }
  4         9  
3052             }
3053             sub STORE {
3054 0     0     my $r = &_cellref;
3055 0           $$r = shift;
3056             }
3057             sub NEXTKEY {
3058 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3059 0           each %{ $$sheet->{colx} }
  0            
3060             }
3061             sub FIRSTKEY {
3062 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3063 0           my $colx = $$sheet->{colx};
3064 0           my $a = scalar keys %$colx; # reset iterator
3065 0           each %$colx;
3066             }
3067             sub EXISTS {
3068 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3069 0           my $key = shift;
3070 0           exists $$sheet->{colx}->{$key}
3071             }
3072             sub SCALAR {
3073 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3074 0           scalar %{ $$sheet->{colx} }
  0            
3075             }
3076 0     0     sub DELETE { confess "DELETE not allowed for ".__PACKAGE__ }
3077 0     0     sub CLEAR { confess "CLEAR not allowed for ".__PACKAGE__ }
3078              
3079             # End package Spreadsheet::Edit::Magicrow;
3080             #====================================================================
3081              
3082             1;
3083             __END__
3084              
3085             =pod
3086              
3087             =encoding UTF-8
3088              
3089             =head1 NAME
3090              
3091             Spreadsheet::Edit - Slice and dice spreadsheets, optionally using tied variables.
3092              
3093             =head1 NON-OO SYNOPSIS
3094              
3095             use Spreadsheet::Edit qw(:all);
3096              
3097             # Examples assume a spreadsheet with these titles in the first row:
3098             # "Account Number" "Customer's Name" "Email" "Home-phone" "Income"
3099              
3100             read_spreadsheet "mailing_list.xls!Sheet1";
3101              
3102             # alias an identifier to a long or complicated title
3103             alias Name => qr/customer/i; # matches "Customer's Name"
3104              
3105             # ------------ without tied column variables -----------
3106              
3107             # Print the data
3108             printf "%20s %8s %8s %-13s %s\n", "Name","A/N","Income","Phone","Email";
3109             apply {
3110             printf "%20s %8d %8.2f %-13s %s\n",
3111             $crow{Name}, # this key is an explicit alias
3112             $crow{"Account Number"}, # ...actual title
3113             $crow{Income}, # ...actual title
3114             $crow{Home_phone}, # ...auto-generated alias
3115             $crow{Email} ; # ...actual title
3116             };
3117              
3118             # Randomly access rows.
3119             print "Row 42: Column 'C' is ", $rows[41]{C}, "\n";
3120             print "Row 42: Customer's Name is ", $rows[41]{Name}, "\n";
3121             print "Row 42: 3rd column is ", $rows[41][2], "\n";
3122              
3123             # Split the "Customer's Name" into separate FName and LName columns
3124             insert_cols '>Name', "FName", "LName";
3125             apply {
3126             ($crow{FName}, $crow{LName}) = ($crow{Name} =~ /(.*) (.*)/)
3127             or die logmsg "Could not parse Name"; # logmsg adds current row number
3128             };
3129             delete_cols "Name";
3130              
3131             # Sort by last name
3132             sort_rows { $a->{LName} cmp $b->{LName} };
3133              
3134             # ------------ using tied column variables -----------
3135              
3136             our $Name; # 'Name' is the explicit alias created above
3137             our $Account_Number; # Auto-generated alias for "Account Number"
3138             our $Home_phone; # ditto
3139             our $Income; # 'Income' is an actual title
3140             our $Email; # ditto
3141             our ($FName, $LName); # These columns do not yet exist
3142              
3143             tie_column_vars "Name", "Account_Number", qr/phone/, qr/^inc/i, "FName", "LName";
3144              
3145             # Print the data
3146             printf "%20s %8s %8s %-13s %s\n", "Name","A/N","Income","Phone","Email";
3147             apply {
3148             printf "%20s %8d %8.2f %-13s%s\n",
3149             $Name, $Account_Number, $Income, $Home_phone, $Email;
3150             };
3151              
3152             # Split the "Customer's Name" into separate FName and LName columns
3153             insert_cols '>Name', "FName", "LName";
3154             apply {
3155             ($FName, $LName) = ($Name =~ /^(\S+) (\S+)$/)
3156             or die logmsg "Could not parse Name";
3157             };
3158             delete_cols "Name";
3159              
3160             # Simple mail-merge
3161             use POSIX qw(strftime);
3162             apply {
3163             return
3164             if $Income < 100000; # not in our audience
3165             open SENDMAIL, "|sendmail -t -oi" || die "pipe:$!";
3166             print SENDMAIL "To: $FName $LName <$Email>\n";
3167             print SENDMAIL strftime("Date: %a, %d %b %y %T %z\n", localtime(time));
3168             print SENDMAIL <<EOF ;
3169             From: sales\@example.com
3170             Subject: Help for the 1%
3171              
3172             Dear $FName,
3173             If you have disposable income, we can help with that.
3174             Sincerely,
3175             Your investment advisor.
3176             EOF
3177             close SENDMAIL || die "sendmail failed ($?)\n";
3178             };
3179              
3180             # ------------ multiple sheets --------------
3181              
3182             our ($Foo, $Bar, $Income);
3183              
3184             read_spreadsheet "file1.csv";
3185             tie_column_vars; # tie all vars that ever become valid
3186              
3187             my $s1 = sheet undef ; # Save ref to current sheet & forget it
3188              
3189             read_spreadsheet "file2.csv"; # Auto-creates sheet bc current is undef
3190             tie_column_vars;
3191              
3192             my $s2 = sheet ; # Save ref to second sheet
3193              
3194             print "$Foo $Bar $Income\n"; # these refer to $s2, the current sheet
3195              
3196             sheet $s1 ;
3197             print "$FName $LName $Income\n"; # these now refer to the original sheet
3198              
3199             # ------------ create sheet from memory --------------
3200              
3201             my $s3 = new_sheet
3202             data_source => "my own data",
3203             rows => [
3204             ["This is a row before the title row" ],
3205             ["Full Name", "Address", "City", "State", "Zip" ],
3206             ["Joe Smith", "123 Main St", "Boston", "CA", "12345"],
3207             ["Mary Jones", "999 Olive Drive", "Fenton", "OH", "67890"],
3208             ],
3209             ;
3210             $s3=>title_rx(1);
3211             ...
3212              
3213             =head1 OO SYNOPSIS
3214              
3215             use Spreadsheet::Edit ();
3216              
3217             my $sheet = Spreadsheet::Edit->new();
3218             $sheet->read_spreadsheet("mailing_list.xls!sheet name");
3219             $sheet->alias( Name => qr/customer/i ); # matches "Customer's Name"
3220              
3221             # Randomly access rows.
3222             # Sheet objects, when used as an ARRAYref, act like \@rows
3223             print "Row 42: Name is ", $sheet->[41]{Name}, "\n";
3224             print "Row 42, Column 3 is ", $sheet->[41][2], "\n";
3225              
3226             # Print the data.
3227             # Sheet objects, when used as an HASHref, act like \%crow
3228             printf "%20s %8s %8s %-13s %s\n", "Name","A/N","Income","Phone","Email";
3229             $sheet->apply( sub{
3230             printf "%20s %8d %8.2f %-13s%s\n",
3231             $sheet->{Name},
3232             $sheet->{"Account Number"},
3233             $sheet->{Income},
3234             $sheet->{Home_phone},
3235             $sheet->{Email} ;
3236             });
3237              
3238             # Another way:
3239             $sheet->apply( sub{
3240             my $r = $sheet->crow();
3241             printf "%20s %8d %8.2f %-13s%s\n",
3242             $r->{Name}, $r->{"Account Number"}, $r->{Income},
3243             $r->{Home_phone}, $r->{Email} ;
3244             });
3245              
3246             # Another way:
3247             $sheet->apply( sub{
3248             my $r = $sheet->crow();
3249             printf "%20s %8d %8.2f %-13s%s\n",
3250             $r->[0], $r->[1], $r->[4], $r->[3], $r->[2] ;
3251             });
3252              
3253             # Split the "Customer's Name" into separate FName and LName columns
3254             $sheet->insert_cols('>Name', "FName", "LName");
3255             $sheet->apply( sub {
3256             my $r = $sheet->crow();
3257             ($r->{FName}, $r->{LName}) = ($r->{Name} =~ /(.*) (.*)/)
3258             or die Spreadsheet::Edit::logmsg("Could not parse Name");
3259             });
3260             $sheet->delete_cols( "Name" );
3261              
3262              
3263             =head1 INTRODUCTION
3264              
3265             =over
3266              
3267              
3268             Skip ahead to "FUNCTIONS" for a list of operations (OO methods are
3269             named the same).
3270              
3271             =back
3272              
3273             Columns may be referenced by title without knowing their absolute positions.
3274             Optionally global (package) variables may be tied to columns.
3275             Tabular data can come
3276             from Spreadsheets, CSV files, or your code, and be written similarly.
3277             Both Functional and Object-Oriented (OO) APIs are provided.
3278              
3279             A table in memory is stored in a C<sheet> object, which contains
3280             an array of rows, each of which is an array of cell values.
3281              
3282             Cells within a row may be accessed by name (e.g. column titles) or
3283             by absolute position (letter code like "A", "B" etc. or zero-based index).
3284              
3285             The usual paradigm is to iterate over rows applying a function
3286             to each, vaguely inspired by 'sed' and 'awk' (see C<apply> below).
3287             Random access is also supported.
3288              
3289             Tied variables can be used with the functional API. These variables
3290             refer to columns in the current row during a C<apply> operation.
3291              
3292             Note: Only cell I<values> are handled; there is no provision
3293             for processing formatting information from spreadsheets.
3294             The author has a notion to add support for formats,
3295             perhaps integrating with Spreadsheet::Read and Spreadsheet::Write
3296             or the packages they use. Please contact the author if you want to help.
3297              
3298             =head3 HOW TO IMPORT
3299              
3300             By default only functions are imported, but most people will
3301              
3302             use Spreadsheet::Edit ':all';
3303              
3304             to import functions and helper variables (see STANDARD SHEET VARIABLES
3305             and VARIABLES USED DURING APPLY).
3306              
3307             You can rename imported items using the '-as' notation shown in
3308             L<Exporter::Tiny::Manual::QuickStart>.
3309              
3310             Purely-OO applications can L<use Spreadsheet::Edit ();>.
3311              
3312             =head1 THE 'CURRENT SHEET'
3313              
3314             The I<Functions> and helper variables implicitly operate on a
3315             package-global "current sheet" object, which can be switched at will.
3316             OO I<Methods> operate on the C<sheet> object they are called on.
3317              
3318             Functions which operates on the "current sheet" have
3319             corresponding OO methods with the same names and arguments
3320             (note that method args must be enclosed by parenthesis).
3321              
3322             =head1 TIED COLUMN VARIABLES
3323              
3324             Package variables can refer directly to columns in the 'current sheet'
3325             during C<apply>. For example C<$Email> and C<$FName> in
3326             the SYNOPSIS above.
3327              
3328             See C<tie_column_vars> for details.
3329              
3330             =head1 THE FUNCTIONS
3331              
3332             In the following, {OPTIONS} refers to an optional first argument
3333             which, if present, is a hashref giving additional parameters.
3334             For example in
3335              
3336             read_spreadsheet {sheetname => 'Sheet1'}, '/path/to/file.xlsx';
3337              
3338             the {...} hash is optional and specifies the sheet name.
3339              
3340             =head2 $curr_sheet = sheet ;
3341              
3342             =head2 $prev_sheet = sheet $another_sheet ;
3343              
3344             =head2 $prev_sheet = sheet undef ;
3345              
3346             [Functional API only]
3347             Retrieve, change, or forget the 'current sheet' object used
3348             by the functional API.
3349              
3350             Changing the current sheet immediately changes what is referenced by
3351             tied column variables and STANDARD SHEET VARIABLES (described later).
3352              
3353             {OPTIONS} may specify C<< package => 'pkgname' >> to operate on the specified
3354             package instead of the caller's package (useful in library code).
3355              
3356             =head2 read_spreadsheet CSVFILEPATH
3357              
3358             =head2 read_spreadsheet SPREADSHEETPATH
3359              
3360             =head2 read_spreadsheet "SPREADSHEETPATH!SHEETNAME"
3361              
3362             Replace any existing data with content from the given file.
3363              
3364             The Functional API will create a new sheet object if
3365             there is no "current sheet".
3366              
3367             The file may be a .csv or any format supported by Libre Office or gnumeric.
3368              
3369             By default column titles are auto-detected and
3370             an exception is thrown if a plausible title row can not be found.
3371              
3372             {OPTIONS} may include:
3373              
3374             Auto-detection options:
3375              
3376             =over 2
3377              
3378             required => COLSPEC or [COLSPEC,...] # any required title(s)
3379             min_rx => NUM, # first rx which may contain the title row.
3380             max_rx => NUM, # maximum rx which may contain the title row.
3381             first_cx => NUM, # first column ix which must contain required titles
3382             last_cx => NUM, # last column ix which must contain required titles
3383              
3384             =back
3385              
3386             The first row is used which includes the C<required> title(s), if any,
3387             and has non-empty titles in all columns, or columns
3388             C<first_cx> through C<last_cx>.
3389              
3390             =over 2
3391              
3392             title_rx => rx # specify title row (first row is 0)
3393             title_rx => undef # no title row
3394              
3395             =back
3396              
3397             These disable auto-detection and explicitly specify the title row
3398             index, or with C<undef>, that there are no titles.
3399             See also the C<title_rx> function/method.
3400              
3401             Other options:
3402              
3403             =over 6
3404              
3405             =item sheetname => SHEETNAME
3406              
3407             Specify which sheet in a multi-sheet workbook (i.e. spreadsheet file) to read.
3408             Alternatively, the sheet name may be appended to the input
3409             path after '!' as shown in the example.
3410              
3411             If no SHEETNAME is given then the sheet which was "active" when the
3412             workbook was saved will be retrieved.
3413              
3414             =item silent => bool
3415              
3416             =item verbose => bool
3417              
3418             =item debug => bool
3419              
3420             Probably what you expect.
3421              
3422             =item Other C<< key => value >> pairs override details of CSV parsing.
3423              
3424             See L<Text::CSV>. UTF-8 encoding is assumed by default.
3425              
3426             =back
3427              
3428             Due to bugs in Libre/Open Office, spreadsheet files can not
3429             be read if LO/OO is currently running, even
3430             for unrelated purposes (see "BUGS").
3431             This problem does not occur with .csv files
3432              
3433             =head2 alias IDENT => COLSPEC, ... ;
3434              
3435             =head2 alias IDENT => qr/regexp/, ... ;
3436              
3437             Create alternate identifiers for specified columns.
3438              
3439             Each IDENT, which must be a valid Perl identifier, will henceforth
3440             refer to the specified column even if the identifier is the same
3441             as the title or letter code of a different column.
3442              
3443             C<$row{IDENT}> and a tied variable C<$IDENT> will refer to the specified column.
3444              
3445             Aliases automatically track the column if it's position changes.
3446              
3447             Regular Expressions are matched against titles only, and an exception is
3448             thrown if more than one title matches.
3449              
3450             Otherwise, COLSPECs may be titles, existing alias names, column letters, etc.
3451             (see "COLUMN SPECIFIERS" for details).
3452              
3453             The COLSPEC is evaluated before the alias is created, so
3454              
3455             alias B => "B";
3456              
3457             would make "B" henceforth refer to the column with title "B", if one exists,
3458             or the second column (treating COLSPEC as a letter-code), even if the column
3459             is later moved.
3460              
3461             RETURNS: The 0-based column indices of the aliased column(s).
3462              
3463             =head2 unalias IDENT, ... ;
3464              
3465             Forget alias(es). Any masked COLSPECs become usable again.
3466              
3467             =head2 tie_column_vars VARNAME, ...
3468              
3469             Create tied package variables (scalars) for use during C<apply>.
3470              
3471             Each variable is a scalar corresponding to a column, and reading or writing
3472             it accesses the corresponding cell in the row being visited during C<apply>.
3473              
3474             The '$' may be omitted in the VARNAME arguments to C<tie_column_vars>;
3475              
3476             You must separately declare these variables with C<our $NAME>,
3477             except in the special case described
3478             in "Use in BEGIN() or module import methods" later.
3479              
3480             The variable name itself implies the column it refers to.
3481              
3482             Variable names may be:
3483              
3484             =over
3485              
3486             =item * User-defined alias names (see "alias")
3487              
3488             =item * Titles which happen to be valid Perl identifiers
3489              
3490             =item * Identifiers derived from titles by replacing offending characters
3491             with underscrores (see "AUTOMATIC ALIASES"),
3492              
3493             =item * Spreadsheet column letters like "A", "B" etc.
3494              
3495             =back
3496              
3497             See "CONFLICT RESOLUTION" for how ambiguity is resolved.
3498              
3499             Multiple calls accumulate, including with different sheets.
3500              
3501             Variable bindings are dynamically evaluated during each access by using the
3502             variable's identifier as a COLSPEC with the 'current sheet' in your package.
3503             This means that it does not matter which sheet
3504             was 'current' when C<tie_column_vars> was called with a particular name;
3505             it only matters that the name of a tied variable is a valid COLSPEC in
3506             the 'current sheet' when that variable is referenced
3507             (otherwise a read returns I<undef> and a write throws an exception).
3508             [*Need clarification*]
3509              
3510             B<{OPTIONS}> may specify:
3511              
3512             =over
3513              
3514             =item package => "pkgname"
3515              
3516             Tie variables in the specified package instead of the caller's package.
3517              
3518             =item verbose => bool
3519              
3520             =item debug => bool
3521              
3522             Print trace messages.
3523              
3524             =back
3525              
3526             =head2 tie_column_vars ':all'
3527              
3528             With the B<:all> token I<all possible variables> are tied, corresponding
3529             to the aliases, titles, non-conflicting column letters etc. which exist
3530             for the current sheet.
3531              
3532             In addition, variables will be tied in the future I<whenever new identifiers
3533             become valid> (for example when a new C<alias> is created, column added,
3534             or another file is read into the same sheet).
3535              
3536             Although convenient this is B<insecure> because malicious
3537             titles could clobber unintended globals.
3538              
3539             If VARNAMES are also specified, those variables will be tied
3540             immediately even if not yet usable; an exception occurs if a tied variable
3541             is referenced before the corresponding alias or title exists.
3542             [*Need clarification* -- exception even for reads??]
3543              
3544             =head2 Use in BEGIN{} or module import methods
3545              
3546             C<tie_column_vars> B<imports> the tied variables into your module,
3547             or the module specified with package => "pkgname" in {OPTIONS}.
3548              
3549             It is unnecessary to declare tied variables if the import
3550             occurs before code is compiled which references the variables. This can
3551             be the case if C<tie_column_vars> is called in a BEGIN{} block or in the
3552             C<import> method of a module loaded with C<use>.
3553              
3554             L<Spreadsheet::Edit::Preload> makes use of this.
3555              
3556             =head2 $rowindex = title_rx ;
3557              
3558             Retrieve the current title row rx, or undef if there are no titles
3559              
3560             =head2 title_rx ROWINDEX ;
3561              
3562             Make the specified row be the title row. Titles in that row are
3563             immediately (re-)examined and the corresponding COLSPECs become valid,
3564             e.g. you can reference a column by it's title or a derived identifier.
3565              
3566             Note: Setting C<title_rx> this way is rarely needed because
3567             by default C<read_spreadsheet> sets the title row.
3568              
3569             =head2 title_rx undef ;
3570              
3571             Titles are disabled and any existing COLSPECs derived from
3572             titles are invalidated.
3573              
3574             =head2 title_rx {AUTODETECT_OPTIONS} 'auto';
3575              
3576             Immediately perform auto-detection of the title row using
3577             C<< {AUTODETECT_OPTIONS} >> to modify any existing auto-detect options
3578             (see C<read_spreadsheet> for a description of the options).
3579              
3580              
3581             =head2 apply {code} [COLSPEC*] ;
3582              
3583             =head2 apply_all {code} [COLSPEC*] ;
3584              
3585             =head2 apply_torx {code} RX-OR-RXLIST [,COLSPEC*] ;
3586              
3587             =head2 apply_exceptrx {code} RX-OR-RXLIST [,COLSPEC*] ;
3588              
3589             Execute the specified code block (or referenced sub) once for each row.
3590              
3591             Note that there is no comma after a bare {code} block.
3592              
3593             While executing your code, tied column variables and
3594             the sheet variables C<@crow>, C<%crow>, C<$rx> and C<$linenum>
3595             and corresponding OO methods will refer to the row being visited.
3596              
3597             If a list of COLSPECs is specified, then
3598              
3599             =over 2
3600              
3601             @_ is bound to the columns in the order specified
3602             $_ is bound to the first such column
3603              
3604             =back
3605              
3606             C<apply> normally visits all rows which follow the title row, or all rows
3607             if there is no title row.
3608             C<first_data_rx> and C<last_data_rx>, if defined, further limit the
3609             range visited.
3610              
3611             C<apply_all> unconditionally visits every row, including any title row.
3612              
3613             C<apply_torx> or C<apply_exceptrx> visit exactly the indicated rows.
3614             RX-OR-RXLIST may be either a single row index or a [list of rx];
3615              
3616             Rows may be safely inserted or deleted during 'apply';
3617             rows inserted after the currently-being-visited row will be visited
3618             at the proper time.
3619              
3620             Nested and recursive C<apply>s are allowed.
3621             When an 'apply' changes the 'current sheet',
3622             tied variables then refer to the other sheet and
3623             any C<apply> active for that sheet.
3624             With nested 'apply's, take care restore the original sheet before returning
3625             (perhaps using C<Guard::scope_guard>).
3626              
3627             B<MAGIC VARIABLES USED DURING APPLY>
3628              
3629             These variables refer to the row currently being visited:
3630              
3631             =over 2
3632              
3633             B<@crow> is an array aliased to the current row's cells.
3634              
3635             B<%crow> is a hash aliased to the same cells,
3636             indexed by alias, title, letter code, etc. (any COLSPEC).
3637              
3638             B<$rx> is the 0-based index of the current row.
3639              
3640             B<$linenum> is the starting line number of the current row if the
3641             data came from a .csv file.
3642              
3643             For example, the "Account Number" column in the SYNOPSIS may be accessed
3644             many ways:
3645              
3646             alias AcNum => "Account Number";
3647             apply {
3648              
3649             $crow{"Account Number"} # %crow indexed by title
3650             $crow{AcNum} # using an explicit alias
3651             $crow{Account_Number} # using the AUTOMATIC ALIAS
3652              
3653             $crow[ $colx{"Account Number"} ]; # @crow indexed by a 0-based index
3654             $crow[ $colx{"AcNum"} ]; # ...obtained from %colx
3655             $crow[ $colx{"Account_Number"} ]; #
3656              
3657             $rows[$rx]->[ $colx{Account_Number} ] # Directly accessing @rows
3658              
3659             # See "TIED COLUMN VARIABLES" for a sweeter alternative
3660             };
3661              
3662             =back
3663              
3664             =head2 delete_col COLSPEC ;
3665              
3666             =head2 delete_cols COLSPEC+ ;
3667              
3668             The indicated columns are removed. Remaining title bindings
3669             are adjusted to track shifted columns.
3670              
3671             =head2 only_cols COLSPEC+ ;
3672              
3673             All columns I<except> the specified columns are deleted.
3674              
3675             =head2 move_col POSITION, SOURCE ;
3676              
3677             =head2 move_cols POSITION, SOURCES... ;
3678              
3679             Relocate the indicated column(s) (C<SOURCES>) so they are adjacent, in
3680             the order specified, starting at the position C<POSITION>.
3681              
3682             POSITION may be ">COLSPEC" to place moved column(s)
3683             immediately after the indicated column (">$" to place at the end),
3684             or POSITION may directly specify the destination column
3685             using an unadorned COLSPEC.
3686              
3687             A non-absolute COLSPEC indicates the I<initial> position of the referenced column.
3688              
3689             =head2 insert_col POSITION, newtitle ;
3690              
3691             =head2 insert_cols POSITION, newtitles... ;
3692              
3693             One or more columns are created starting at a position
3694             specified the same way as in C<move_cols> (later columns
3695             are moved rightward).
3696              
3697             POSITION may be ">$" to place new column(s) at the far right.
3698              
3699             A new title must be specified for each new column.
3700             If there is no title row, specify C<undef> for each position.
3701              
3702             Returns the new column index or indices.
3703              
3704             =head2 split_col {code} COLSPEC, POSITION, newtitles... ;
3705              
3706             New columns are created starting at POSITION as with C<insert_cols>,
3707             and populated with data from column COLSPEC.
3708              
3709             C<{code}> is called for each row with $_ bound to the cell at COLSPEC
3710             and @_ bound to cell(s) in the new column(s). It is up to your code to
3711             read the old column ($_) and write into the new columns (@_).
3712              
3713             The old column is left as-is (not deleted).
3714              
3715             If there is no title row, specify C<undef> for each new title.
3716              
3717             =head2 sort_rows {rx cmp function}
3718              
3719             =head2 sort_rows {rx cmp function} $first_rx, $last_rx
3720              
3721             If no range is specified, then the range is the
3722             same as for C<apply> (namely: All rows after the title row unless
3723             limited by B<first_data_rx> .. B<last_data_rx>).
3724              
3725             In the comparison function, globals $a and $b will contain row objects, which
3726             are dual-typed to act as either an array or hash ref to the cells
3727             in their row. The corresponding original row indicies are also passed
3728             as parameters in C<@_>.
3729              
3730             Rows are not actually moved until after all comparisons have finished.
3731              
3732             RETURNS: A list of the previous row indicies of all rows in the sheet.
3733              
3734             # Sort on the "LName" column using row indicies
3735             # (contrast with the example in SYNOPSIS which uses $a and $b)
3736             sort_rows { my ($rxa, $rxb) = @_;
3737             $rows[$rxa]{LName} cmp $rows[$rxb]{LName}
3738             };
3739              
3740             =head2 rename_cols COLSPEC, "new title", ... ;
3741              
3742             Multiple pairs may be given. Title cell(s) are updated as indicated.
3743              
3744             Existing user-defined aliases are I<not> affected, i.e.,
3745             they continue to refer to the same columns as before even if the titles changed.
3746              
3747             =head2 join_cols_sep STRING COLSPEC+ ;
3748              
3749             =head2 join_cols {code} COLSPEC+ ;
3750              
3751             The specified columns are combined into the first-specified column and the other
3752             columns are deleted.
3753              
3754             The first argument of C<join_cols_sep> should be a fixed separator.
3755             The first argument of C<join_cols> may be a {code} block or subref;
3756              
3757             If a separator string is specified it is used to join column content
3758             together.
3759              
3760             If a {code} block or sub ref is specified,
3761             it is executed once for each row following the title row,
3762             with $_ bound to the first-named column, i.e. the surviving column,
3763             and @_ bound to all named columns in the order given.
3764              
3765             It is up to your code to combine the data by reading
3766             @_ and writing $_ (or, equivalently, by writing $_[0]).
3767              
3768             C<first_data_rx> and C<last_data_rx> are ignored, and the title
3769             is I<not> modified.
3770              
3771             =head2 reverse_cols
3772              
3773             The order of the columns is reversed.
3774              
3775             =head2 insert_row
3776              
3777             =head2 insert_row 'END' [,$count]
3778              
3779             =head2 insert_rows $rowx [,$count]
3780              
3781             Insert one or more empty rows at the indicated position
3782             (default: at end). C<$rowx>, if specified, is either a 0-based offset
3783             for the new row or 'END' to add the new row(s) at the end.
3784             Returns the index of the first new row.
3785              
3786             =head2 delete_rows $rowx,... ;
3787              
3788             =head2 delete_rows 'LAST',... ;
3789              
3790             The indicated data rows are deleted. C<$rowx> is a zero-based row index
3791             or the special token "LAST" to indicate the last row (same as C<$#rows>).
3792             Any number of rows may be deleted in a single command, listed individually.
3793              
3794             =for Pod::Coverage delete_row
3795              
3796             =head2 transpose
3797              
3798             Invert the relation, i.e. rotate and flip the table.
3799             Cells A1,B1,C1 etc. become A1,A2,A3 etc.
3800             Any title_rx is forgotten.
3801              
3802             =head2 $href = read_workbook SPREADSHEETPATH
3803              
3804             **NOT YET IMPLEMENTED**
3805              
3806             [Function only, not callable as a method]
3807             All sheets in the specified document are read into memory
3808             without changing the 'current sheet'. A hashref is returned:
3809              
3810             {
3811             "sheet name" => (Spreadsheet::Edit object),
3812             ...for each sheet in the workbook...
3813             }
3814              
3815             To access one of the workbook sheets, execute
3816              
3817             sheet $href->{"sheet name"}; # or call OO methods on it
3818              
3819             If SPREADSHEETPATH was a .csv file then the resulting hash will have only
3820             one member with an indeterminate key.
3821              
3822             =head2 new_sheet
3823              
3824             [functional API only]
3825             Create a new empty sheet and make it the 'current sheet', returning the
3826             sheet object.
3827              
3828             Rarely used because a new sheet is automatically created by
3829             C<read_spreadsheet> if your package has no current sheet.
3830              
3831             {OPTIONS} may include:
3832              
3833             =over 6
3834              
3835             =item data_source => "text..."
3836              
3837             This string will be returned by the C<data_source> method,
3838             overriding any default.
3839              
3840             =item rows => [[A1_value,B1_value,...], [A2_value,B2_value,...], ...],
3841              
3842             =item linenums => [...] #optional
3843              
3844             This makes the C<sheet> object hold data already in memory.
3845             The data should not be modified directly while the sheet C<object> exists.
3846              
3847             =item clone => $existing_sheet
3848              
3849             A deep copy of an existing sheet is made.
3850              
3851             =item num_cols => $number # with no initial content
3852              
3853             An empty sheet is created but with a fixed number of columns.
3854             When rows are later created they will be immediately padded with empty cells
3855             if necessary to this width.
3856              
3857             =back
3858              
3859              
3860             =head2 write_csv *FILEHANDLE
3861              
3862             =head2 write_csv $path
3863              
3864             Write the current data to the indicated path or open file handle as
3865             a CSV text file.
3866             The default encoding is UTF-8 or, if C<read_spreadsheet> was most-recently
3867             used to read a csv file, the encoding used then.
3868              
3869             {OPTIONS} may include
3870              
3871             =over 6
3872              
3873             =item options for Text::CSV
3874              
3875             Usually none need be specified because we supply sane defaults.
3876              
3877             =item silent => bool
3878              
3879             =item verbose => bool
3880              
3881             =item debug => bool
3882              
3883             =back
3884              
3885             =head2 write_spreadsheet OUTPUTPATH
3886              
3887             Write the current data to a spreadsheet (.ods, .xlsx, etc.) by
3888             first writing to a temporary CSV file and then importing that file into
3889             a new spreadsheet.
3890              
3891             {OPTIONS} may include
3892              
3893             =over 6
3894              
3895             =item col_formats => [ LIST ]
3896              
3897             EXPERIMENTAL, likely to change when Spreadsheet::Read is integrated!
3898              
3899             Elements of LIST may be "" (Standard), "Text", "MM/DD/YY", "DD/MM/YY", or
3900             "YY/MM/DD" to indicate the format of the corresponding column. The meaning
3901             of "Standard" is not well documented but appears to mean "General Number"
3902             in most cases. For details, see "Format Codes" in L<this old Open Office
3903             documentation|https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options#Filter_Options_for_the_CSV_Filter>.
3904              
3905             =item silent => bool
3906              
3907             =item verbose => bool
3908              
3909             =item debug => bool
3910              
3911             =back
3912              
3913              
3914             =head2 options NAME => EXPR, ... ;
3915              
3916             =head2 options NAME ;
3917              
3918             Set or retrieve miscellaneous sheet-specific options.
3919             When setting, the previous value of
3920             the last option specified is returned. The only options currently defined
3921             are I<silent>, I<verbose> and I<debug>.
3922              
3923             =head2 $hash = attributes ;
3924              
3925             Returns a reference to a hash in which you may store arbitrary data
3926             associated with the sheet object.
3927              
3928             =head2 spectocx COLSPEC or qr/regexp/, ... ;
3929              
3930             Returns the 0-based indicies of the specified colomn(s).
3931             Throws an exception if there is no such column.
3932             A regexp may match multiple columns.
3933             See also C<%colx>.
3934              
3935             =head2 logmsg [FOCUSARG,] string, string, ...
3936              
3937             (must be explicitly imported)
3938              
3939             Concatenate strings, prefixed by a description
3940             of the 'current sheet' and row during C<apply>, if any (or with the
3941             sheet and/or row given by FOCUSARG).
3942              
3943             The resulting string is returned, with "\n" appended if it was not
3944             already terminated by a newline.
3945              
3946             The first argument is used as FOCUSARG if it is
3947             a sheet object, [sheet_object], or [sheet_object, rowindex], and specifies
3948             the sheet and/or row to describe in the message prefix.
3949             Otherwise the first argument is not special and is simply
3950             the first message string.
3951              
3952             The details of formatting the sheet may be customized with a call-back
3953             given by a C<{logmsg_pfx_gen}> attribute. See comments
3954             in the source for how this works.
3955              
3956             =head1 STANDARD SHEET VARIABLES
3957              
3958             These variables magically access the 'current sheet' in your package.
3959              
3960             =over
3961              
3962             =item @rows
3963              
3964             The spreadsheet data as an array of row objects.
3965              
3966             Each row object is "dual-typed" (overloaded) to act as either an ARRAY or HASH
3967             reference to the cells in that row.
3968              
3969             When used as a HASH ref, the key may be a
3970             alias, column title, letter-code etc. (any COLSPEC).
3971             When used as an ARRAY ref, the 0-based index specifies the column.
3972              
3973             =item @linenums
3974              
3975             The first line numbers of corresponding rows (a row can contain
3976             multiple lines if cells contain embedded newlines). Valid only if
3977             the data came from a CSV file.
3978              
3979             =item $num_cols
3980              
3981             The number of columns in the widest input row. Shorter rows are
3982             padded with empty cells when read so that all rows have the same number
3983             of columns in memory.
3984              
3985             =item $title_rx and $title_row
3986              
3987             C<$title_rx> contains the 0-based row index of the title row
3988             and C<$title_row> is an alias for C<$rows[ $title_rx ]>.
3989              
3990             The title row is auto-detected by default.
3991             See C<read_spreadsheet> and C<title_rx> for how to control this.
3992              
3993             If a column title is modified, set C<$title_rx = undef;> to force re-detection.
3994              
3995             =item $first_data_rx and $last_data_rx
3996              
3997             Optional limits on the range of rows visited by C<apply()>
3998             or sorted by C<sort_rows()>. By default $first_data_rx
3999             is the first row following the title row (or 0 if no title row).
4000              
4001             =item %colx (column key => column index)
4002              
4003             C<< %colx >> maps aliases, titles, etc. (all currently-valid COLSPECs)
4004             to the corresponding zero-based column indicies. See "COLSPECS" .
4005              
4006             =item %colx_desc (column key => "debugging info")
4007              
4008             =back
4009              
4010             =head1 COLSPECs (COLUMN SPECIFIERS)
4011              
4012             Arguments which specify columns may be:
4013              
4014             =over
4015              
4016             =item (1) a user-defined alias identifier
4017              
4018             =item (2) an actual "column title" **
4019              
4020             =item (3) an actual title with any leading & trailing spaces removed *
4021              
4022             =item (4) an AUTOMATIC ALIAS identifier *
4023              
4024              
4025             =item (6) a Regexp (qr/.../) which matches an actual title
4026              
4027             =item (7) a numeric column index (0-based)
4028              
4029             =item (8) '^' or '$' (means first or last column, respectively)
4030              
4031              
4032             =back
4033              
4034             *These may only be used if they do not conflict with an
4035             item listed higher up.
4036              
4037             **Titles may be used directly if they can not be confused with
4038             a user-defined alias, the special names '^' or '$' or a numeric
4039             column index. See "CONFLICT RESOLUTION".
4040              
4041             B<AUTOMATIC ALIASES> are Perl I<identifiers> derived from column titles by
4042             first removing leading or trailing spaces, and then
4043             replacing non-word characters with underscores and prepending
4044             an underscore if necessary.
4045             For example:
4046              
4047             Title Automatic Alias
4048              
4049             "Address" Address (no change needed)
4050             " First Name " First_Name
4051             "First & Last" First___Last
4052             "+sizes" _sizes
4053             "1000s" _1000s (underscore avoids leading digit)
4054              
4055             Aliases (both automatic and user-defined) are valid identifiers,
4056             so can be used as the names of tied variables,
4057             bareword keys to C<%colx> and C<%crow>, and related OO interfaces,
4058              
4059             CONFLICT RESOLUTION
4060              
4061             A conflict occurs when a column key potentially refers to multiple
4062             columns. For example, "A", "B" etc. are standard column
4063             names, but they might also be the actual titles of other columns.
4064             Warnings are printed about conflicts unless the B<silent> option
4065             is true (see C<options>).
4066              
4067             =over
4068              
4069             B<User alias identifiers> (defined using C<alias>) are always valid.
4070              
4071             '^' and '$' always refer to the first and last column.
4072              
4073             Numeric "names" 0, 1, etc. always give a 0-based column index
4074             if the value is between 0 and num_cols (i.e. one past the end).
4075              
4076             B<Actual Titles> refer to to their columns, except if they:
4077              
4078             =over
4079              
4080             are the same as a user-defined alias
4081              
4082             are '^' or '$'
4083              
4084             consist only of digits (without leading 0s) corresponding
4085             to a valid column index.
4086              
4087             =back
4088              
4089             B<Automatic Aliases> and B<Standard column names> ("A", "B", etc.)
4090             are available as column keys
4091             unless they conflict with a user-defined alias or an actual title.
4092              
4093             =back
4094              
4095             Note: To unconditionally refer to numeric titles or titles which
4096             look like '^' or '$', use a Regexp B<qr/.../>.
4097             Automatic Aliases can also refer to such titles if there are no conflicts.
4098              
4099             Column positions always refer to the data before a command is
4100             executed. This is relevant for commands which re-number or delete columns.
4101              
4102             =head1 OO DESCRIPTION (OBJECT-ORIENTED INTERFACE)
4103              
4104             All the Functions listed above (except for C<new_sheet>) have
4105             corresponding methods with the same arguments.
4106              
4107             However method arguments must be enclosed in parenthesis
4108             and bare {code} blocks may not be used; a sub{...} ref
4109             should be passed to C<apply> etc.
4110              
4111             =head1 OO-SPECIFIC METHODS
4112              
4113             =head2 Spreadsheet::Edit->new(OPTIONS...)
4114              
4115             Creates a new "sheet" object.
4116              
4117             OPTIONS are the same as described for the C<new_sheet> Function above,
4118             except that they may be specified as key => value pairs of arguments
4119             instead of (or in addition to) an {OPTIONS} hashref.
4120              
4121             =head2 $sheet->rows() ; # Analogous to to \@rows
4122              
4123             =head2 $sheet->linenums() ; # Analogous to \@linenums
4124              
4125             =head2 $sheet->num_cols() ; # Analogous to $num_cols
4126              
4127             =head2 $sheet->colx() ; # Analogous to \%colx
4128              
4129             =head2 $sheet->colx_desc() ; # Analogous to \%colx_desc
4130              
4131             =head2 $sheet->first_data_rx() ; # Analogous to $first_data_rx
4132              
4133             =head2 $sheet->last_data_rx() ; # Analogous to $last_data_rx
4134              
4135             =head2 $sheet->title_rx() ; # Analogous to to $title_rx
4136              
4137             =head2 $sheet->title_row() ; # Analogous to $title_row
4138              
4139             =head2 $sheet->rx() ; # Current rx in apply, analogous to to $rx
4140              
4141             =head2 $sheet->title_rx(rxvalue) ; # Analogous to assigning to $title_rx (changes title row)
4142              
4143             =head2 $sheet->crow(); # Current row in apply (a dual-typed row object)
4144              
4145             =head2 $sheet->linenum() ; # Analogous to to $linenum
4146              
4147             =head2 $sheet->get(rx,ident) ; # Analogous to to $rows[rx]{ident}
4148              
4149             =head2 $sheet->set(rx,ident,value); # Analogous to to $rows[rx]{ident} = value
4150              
4151             =head2 $sheet->data_source(); # Returns "description of sheet" (e.g. path read)
4152              
4153             =head2 $sheet->sheetname(); # valid if input was a spreadsheet, else undef
4154              
4155             =head2
4156              
4157              
4158             =head1 SEE ALSO
4159              
4160             L<Spreadsheet::Edit::Preload>
4161              
4162             =head1 BUGS
4163              
4164             Some vestigial support for formats remains from an earlier implementation,
4165             but this support is likely to be entirely replaced at some point.
4166              
4167             Reading a spreadsheet (but not csv) may fail if I<Libre Office>
4168             or I<Open Office> are currently running for any purpose; this seems to be
4169             a bug or limitation where batch-mode operations share the same profile as
4170             interactive sessions. In any case, I<ssconvert> (gnumeric) will be used
4171             if it is installed, and does not have this limitation.
4172             In the future Spreadsheet::Read might be used instead of external programs,
4173             although it uses Twig and is quite a bit slower.
4174              
4175             =head1 THREAD SAFETY
4176              
4177             Unknown, and probably not worth the trouble to find out.
4178              
4179             =head1 FUTURE IDEAS
4180              
4181             =over 4
4182              
4183             =item Add format-processing support.
4184              
4185             =item Add "column-major" views.
4186              
4187             This would allow accessing a whole column as an array.
4188             Perhaps C<@cols> and C<%cols> would be sets of column arrays
4189             (@cols indexed by column index, %cols indexed by any COLSPEC).
4190             And C<tie_column_vars '@NAME'> would tie user array variables to columns.
4191              
4192             =back
4193              
4194             =head1 AUTHOR
4195              
4196             Jim Avera (jim.avera at gmail)
4197              
4198             =head1 LICENSE
4199              
4200             Public Domain or CC0.
4201              
4202             =for Pod::Coverage meta_info
4203              
4204             =for Pod::Coverage iolayers input_encoding
4205              
4206             =for Pod::Coverage oops btw fmt_sheet fmt_list
4207              
4208             =for Pod::Coverage to_aref to_array to_wanted to_hash
4209              
4210             =for Pod::Coverage tied_varnames title2ident let2cx cx2let
4211              
4212             =cut
4213