File Coverage

blib/lib/Spreadsheet/Edit.pm
Criterion Covered Total %
statement 699 1534 45.5
branch 186 690 26.9
condition 75 365 20.5
subroutine 135 226 59.7
pod 50 63 79.3
total 1145 2878 39.7


line stmt bran cond sub pod time code
1             # License: Public Domain or CC0
2             # See https://creativecommons.org/publicdomain/zero/1.0/
3             # The author, Jim Avera (jim.avera at gmail) has waived all copyright and
4             # related or neighboring rights. Attribution is requested but is not required.
5              
6             # Pod documentation is below (use perldoc to view)
7              
8 3     3   3470591 use 5.18.0; # lexical subs with bug we have worked around
  3         32  
9 3     3   24 use strict; use warnings FATAL => 'all'; use utf8;
  3     3   9  
  3     3   89  
  3         25  
  3         7  
  3         120  
  3         16  
  3         11  
  3         22  
10 3     3   84 no warnings qw(experimental::lexical_subs);
  3         14  
  3         126  
11 3     3   16 use feature qw(state lexical_subs current_sub);
  3         7  
  3         425  
12              
13             package Spreadsheet::Edit;
14              
15             # Allow "use <thismodule> <someversion>;" in development sandbox to not bomb
16 3     3   20 { no strict 'refs'; ${__PACKAGE__."::VER"."SION"} = 1999.999; }
  3         6  
  3         436  
17              
18             our $VERSION = '1000.006'; # VERSION from Dist::Zilla::Plugin::OurPkgVersion
19             our $DATE = '2023-09-06'; # DATE from Dist::Zilla::Plugin::OurDate
20              
21             # FIXME: cmd_nesting does nothing except prefix >s to log messages.
22             # Shouldn't it skip that many "public" call frames???
23              
24             # TODO FIXME: Integrate with Spreadsheet::Read and provide a formatting API
25              
26             # TODO: Allow & support undef cell values (see Text::CSV_XS), used to
27             # represent "NULL" when interfacing with database systems.
28             # OTOH, this conflicts with failed optional alias keys
29              
30             # TODO: Add some way to exit an apply() early, e.g. return value?
31             # or maybe provide an abort_apply(resultval) function
32             # which throws an exception we can catch in apply?
33              
34             # TODO: Use Tie::File to avoid storing entire sheet in memory
35             # (requires seekable, so must depend on OpenAsCsv
36             # copying "-" to a temp file if it is not a plain file).
37              
38             #########################################
39             # sub NAMING CONVENTION:
40             # sub public_method_or_function
41             # sub _internal_method
42             # sub __internal_function
43             #########################################
44              
45             # If these globals are *defined* then the corresponding
46             # (downcased) options default accordingly when new sheets are created.
47             our ($Debug, $Verbose, $Silent);
48              
49             ############################## Exporting stuff ############################
50             #
51 3     3   1592 use Exporter::Tiny 1.001_000 (); # just to require version with non-sub generator support
  3         15863  
  3         113  
52 3     3   32 use parent "Exporter::Tiny"; # make us be a derived class
  3         12  
  3         24  
53              
54             require mro; # makes next::can available
55              
56             sub import {
57             # copied from List::Util
58             # (RT88848) Touch the caller's $a and $b, to avoid the warning of
59             # 'Name "main::a" used only once: possible typo'
60 5     5   556 my $pkg = caller;
61 3     3   365 no strict 'refs';
  3         22  
  3         526  
62 5         12 ${"${pkg}::a"} = ${"${pkg}::a"};
  5         19  
  5         34  
63 5         11 ${"${pkg}::b"} = ${"${pkg}::b"};
  5         15  
  5         17  
64              
65 5         11 my $this = $_[0];
66 5         9 goto &{ $this->next::can }; # see 'perldoc mro'
  5         102  
67             #goto &maybe::next::method ; ???
68             }
69              
70 3     3   27 use Symbol qw/gensym/;
  3         6  
  3         4924  
71              
72             our @EXPORT = qw(
73             alias apply apply_all apply_exceptrx apply_torx attributes
74             spectocx data_source delete_col delete_cols delete_row delete_rows
75             first_data_rx transpose join_cols join_cols_sep last_data_rx move_col
76             move_cols insert_col insert_cols insert_row insert_rows new_sheet only_cols
77             options read_spreadsheet rename_cols reverse_cols
78             sheet sheetname sort_rows split_col tie_column_vars title2ident title_row
79             title_rx unalias write_csv write_spreadsheet );
80              
81             my @stdvars = qw( $title_rx $first_data_rx $last_data_rx $num_cols
82             @rows @linenums @meta_info %colx %colx_desc $title_row
83             $rx $linenum @crow %crow );
84              
85             our @EXPORT_OK = (@stdvars, qw/logmsg cx2let let2cx fmt_sheet/);
86              
87             our %EXPORT_TAGS = (
88             STDVARS => [@stdvars],
89             FUNCS => [@EXPORT],
90             default => [':FUNCS'],
91             DEFAULT => [':default'],
92             #all => [qw(:STDVARS :FUNCS cx2let let2cx logmsg)],
93             all => [qw(:STDVARS :FUNCS)],
94             );
95              
96             # Although the magic globals $row, $rx and friends are "imported" by 'use',
97             # they can not simply alias variables in Spreadsheet::Edit becuase they
98             # need to be tied with parameters specific to the user's package
99             # so that each package has its own 'current sheet'.
100             #
101             # To accomplish this, we use Exporter:Tiny's "generator" mechanism to
102             # create a new, unique variables tied appropriately for each import.
103             # See perldoc Exporter::Tiny::Manual::QuickStart .
104             #
105             # If "methods" named _generate_SUBNAME, _generateScalar_SCALARNAME,
106             # _generateArray_ARRAYNAME or _generateHash_HASHNAME exist in the
107             # exporting package (that's us), then they are called to obtain
108             # a ref to an object to export as SUBNAME, SCALARNAME, ARRAYNAME or HASHNAME.
109             #
110             # For example _generateScalar_rx() is called to get a ref to an $rx variable.
111              
112             sub __gen_x {
113 36     36   99 my ($myclass, $sigilname, $args, $globals, # supplied by Exporter::Tiny
114             $Type, $helpersub, @args) = @_;
115 36 100       94 my $sigl = $Type eq 'Hash' ? '%' : $Type eq 'Array' ? '@' : '$';
    100          
116 36         66 my $tieclassname = 'Tie::Indirect::'.$Type;
117 36         52 my $ref = *{gensym()}{uc($Type)}; # e.g. "ARARY"
  36         113  
118             # e.g. tie $$ref, 'Tie::Indirect::Scalar', \&_scal_tiehelper, ...
119 36         4027 eval "tie ${sigl}\$ref, \$tieclassname, \$helpersub,
120             \$globals->{into}, \$sigilname, \@args";
121 36 50       722 die $@ if $@;
122 36         237 $ref
123             }
124             sub __gen_scalar {
125 12     12   37 my ($myclass, $sigilname, $args, $globals,
126             $canon_ident, $onlyinapply) = @_;
127 12         31 __gen_x($myclass, $sigilname, $args, $globals,
128             "Scalar", \&Spreadsheet::Edit::_scal_tiehelper,
129             $canon_ident, $onlyinapply);
130             }
131 3     3   282 sub _generateScalar_num_cols { __gen_scalar(@_, "num_cols") }
132 3     3   314 sub _generateScalar_first_data_rx { __gen_scalar(@_, "first_data_rx") }
133 3     3   295 sub _generateScalar_last_data_rx { __gen_scalar(@_, "last_data_rx") }
134 3     3   276 sub _generateScalar_rx { __gen_scalar(@_, "current_rx", 1) }
135              
136             #sub _generateScalar_title_rx { __gen_scalar(@_, "title_rx") }
137             sub _generateScalar_title_rx {
138             __gen_x(@_, "Scalar",
139             sub{
140 0     0   0 my ($mutating, $pkg, $uvar, $onlyinapply)=@_;
141 0         0 my $sheet = &Spreadsheet::Edit::__getsheet(@_);
142             \$$sheet->{title_rx}
143 0     3   0 }, 0 # onlyinapply
  3         6091  
144             )
145             }
146              
147             sub __gen_aryelem {
148 6     6   18 my ($myclass, $sigilname, $args, $globals,
149             $index_ident, $array_ident, $onlyinapply) = @_;
150             # N.B. _aryelem_tiehelper has special logic for 'current_rx' and 'title_rx'
151 6         20 __gen_x($myclass, $sigilname, $args, $globals,
152             "Scalar", \&Spreadsheet::Edit::_aryelem_tiehelper,
153             $index_ident, $array_ident, $onlyinapply);
154             }
155 3     3   268 sub _generateScalar_title_row { __gen_aryelem(@_, "title_rx", "rows") }
156 3     3   248 sub _generateScalar_linenum { __gen_aryelem(@_, "current_rx", "linenums", 1) }
157              
158             sub __gen_hash {
159 6     6   18 my ($myclass, $sigilname, $args, $globals,
160             $field_ident, $onlyinapply) = @_;
161 6         34 __gen_x($myclass, $sigilname, $args, $globals,
162             "Hash", \&Spreadsheet::Edit::_refval_tiehelper,
163             $field_ident, $onlyinapply, 0); # mutable => 0
164             }
165 3     3   297 sub _generateHash_colx { __gen_hash(@_, "colx", 0) }
166 3     3   280 sub _generateHash_colx_desc { __gen_hash(@_, "colx_desc", 0) }
167              
168             sub __gen_array{
169 9     9   24 my ($myclass, $sigilname, $args, $globals,
170             $field_ident, $onlyinapply, $mutable) = @_;
171 9         31 __gen_x($myclass, $sigilname, $args, $globals,
172             "Array", \&Spreadsheet::Edit::_refval_tiehelper,
173             $field_ident, $onlyinapply, $mutable);
174             }
175 3     3   370 sub _generateArray_rows { __gen_array(@_, "rows", 0, 1) }
176              
177             # Currently @linenums is not mutable but maybe it should be?
178 3     3   294 sub _generateArray_linenums { __gen_array(@_, "linenums", 0) }
179              
180             ## FIXME: is meta_info still valid?
181 3     3   298 sub _generateArray_meta_info { __gen_array(@_, "meta_info", 0) }
182              
183             sub __get_currentrow {
184 0     0   0 my ($mutating, $pkg, $uvar) = @_;
185             # Ignore mutating, as it applies to the element not the container
186 0         0 my $sheet = &Spreadsheet::Edit::__getsheet(0, $pkg, $uvar, 1);
187             # Returns the dual-typed ref (Magicrow) for the current row
188 0         0 $$sheet->{rows}->[$$sheet->{current_rx}]
189             }
190             sub _generateArray_crow { # @crow aliases the current row during apply
191 3     3   288 my ($myclass, $sigilname, $args, $globals) = @_;
192 3         7 my $aref = *{gensym()}{ARRAY};
  3         10  
193             tie @$aref, 'Tie::Indirect::Array',
194 3         56 \&__get_currentrow, $globals->{into}, $sigilname ;
195 3         38 $aref
196             }
197             sub _generateHash_crow { # %crow indexes cells in the current row during apply
198 3     3   240 my ($myclass, $sigilname, $args, $globals) = @_;
199 3         7 my $href = *{gensym()}{HASH};
  3         74  
200             tie %$href, 'Tie::Indirect::Hash',
201 3         55 \&__get_currentrow, $globals->{into}, $sigilname ;
202 3         38 $href
203             }
204             #
205             ########################### End of Exporting stuff ##########################
206              
207 3         14 use Spreadsheet::Edit::Log qw/log_call fmt_call log_methcall fmt_methcall oops/,
208 3     3   1717 ':btw=SE${lno}:' ;
  3         9  
209              
210 3     3   24 use Data::Dumper ();
  3         7  
  3         86  
211 3     3   15 use Data::Dumper::Interp 6.002 qw/:all/;
  3         86  
  3         19  
212              
213 3     3   79538 use Carp;
  3         7  
  3         358  
214             our @CARP_NOT = qw(Spreadsheet::Edit
215             Spreadsheet::Edit::IO
216             Tie::Indirect::Array Tie::Indirect::Hash
217             Tie::Indirect::Scalar
218             );
219              
220 3     3   25 use Scalar::Util qw(looks_like_number openhandle reftype refaddr blessed);
  3         8  
  3         243  
221 3     3   23 use List::Util qw(min max sum0 first any all pairs pairgrep);
  3         7  
  3         299  
222 3     3   871 use File::Temp qw(tempfile tempdir);
  3         19499  
  3         237  
223 3     3   32 use File::Basename qw(basename dirname fileparse);
  3         6  
  3         173  
224 3     3   19 use File::Find ();
  3         7  
  3         78  
225 3     3   16 use Symbol qw(gensym);
  3         6  
  3         170  
226 3     3   21 use POSIX qw(INT_MAX);
  3         5  
  3         44  
227 3     3   791 use Guard qw(scope_guard);
  3         524  
  3         252  
228              
229             require Tie::Indirect; # OUR CUSTOM STUFF: stop using it?
230              
231 3     3   2820 use Text::CSV 1.90_01; # 1st version with known_attributes()
  3         55445  
  3         200  
232 3         640 use Spreadsheet::Edit::IO qw(
233             OpenAsCsv @sane_CSV_read_options @sane_CSV_write_options
234             convert_spreadsheet
235 3     3   2384 sheetname_from_spec filepath_from_spec form_spec_with_sheetname);
  3         13  
236              
237 3   33     243 use constant _CALLER_OVERRIDE_CHECK_OK =>
238             (defined(&Carp::CALLER_OVERRIDE_CHECK_OK)
239 3     3   29 && &Carp::CALLER_OVERRIDE_CHECK_OK);
  3         6  
240              
241             # sub __mytraceback() { ... } archived in commit 482e09b6009
242              
243 3     3   18 use constant DEFAULT_WRITE_ENCODING => 'UTF-8';
  3         6  
  3         28218  
244             #use constant DEFAULT_READ_ENCODINGS => 'UTF-8,windows-1252';
245              
246             # This global is used by logmsg() to infer the current sheet if an apply is
247             # active, even if logmsg is called indirectly via another pkg
248             our $_inner_apply_sheet; # see &_apply_to_rows
249              
250             # The "current sheet", to which tied globals refer in any given package.
251             our %pkg2currsheet;
252              
253 24     24   41 sub __looks_like_aref($) { eval{ 1+scalar(@{$_[0]}) } } #actual or overloaded
  24         32  
  24         70  
254              
255             # Utility FUNCTIONS
256             #
257             sub to_array(@) { @_ != 1 ? @_ # 0 or multiple values
258 0         0 : ref($_[0]) eq "ARRAY" ? @{$_[0]}
259 4 0   4 0 23 : ref($_[0]) eq "HASH" ? @{ %{$_[0]} } # (key, value, ...)
  0 0       0  
  0 50       0  
260             : $_[0] # just 1 value
261             }
262 0     0 0 0 sub to_aref(@) { [ to_array(@_) ] }
263 0 0   0 0 0 sub to_wanted(@) { goto &to_array if wantarray; goto &to_aref }
  0         0  
264              
265             sub to_hash(@) {
266 4 50 33 4 0 24 @_==1 && ref($_[0]) eq "HASH" ? $_[0] :
    50          
267             (@_ % 2)!=0 ? croak("odd arg count, expecting key => value pairs") :
268             { to_array(@_) }
269             }
270              
271             sub cx2let(_) { # default arg is $_
272 52     52 0 82 my $cx = shift;
273 52         65 my $ABC="A"; ++$ABC for (1..$cx);
  52         129  
274 52         139 return $ABC
275             }
276             sub let2cx(_) {
277 0     0 0 0 my $ABC = shift;
278 0         0 my $n = ord(substr($ABC,0,1,"")) - ord('A');
279 0         0 while (length $ABC) {
280 0         0 my $letter = substr($ABC,0,1,"");
281 0         0 $n = (($n+1) * 26) + (ord($letter) - ord('A'));
282             }
283 0         0 return $n;
284             }
285              
286             # Produce the "automatic alias" identifier for an arbitrary title
287             sub title2ident($) {
288 32     32 0 56 local $_ = shift;
289 32         64 s/^\s+//; s/\s+$//; s/\W/_/g; s/^(?=\d)/_/;
  32         54  
  32         60  
  32         50  
290 32         88 $_
291             }
292              
293             # Format list as "word,word,..." without parens ; Non-barewords are "quoted".
294 0     0   0 sub __fmt_uqlist(@) { join(",",map{quotekey} @_) }
  0         0  
295 0     0   0 sub __fmt_uqarray(@) { "(" . &__fmt_uqlist . ")" }
296              
297             # Format list as without parens with barewords in qw/.../
298             sub __fmt_uqlistwithqw(@) {
299 0     0   0 my $barewords;
300 0         0 my $s = "";
301 0         0 foreach (map{quotekey} @_) {
  0         0  
302 0 0       0 if (/^\w/) {
303 0 0       0 if ($barewords++) {
304 0         0 $s .= " ";
305             } else {
306 0 0       0 $s .= ", " if $s;
307 0         0 $s .= "qw/";
308             }
309             } else {
310 0 0       0 if ($barewords) {
311 0         0 $s .= "/";
312 0         0 $barewords = 0;
313             }
314 0 0       0 $s .= ", " if $s;
315             }
316 0         0 $s .= $_;
317             }
318 0 0       0 $s .= "/" if $barewords;
319 0         0 $s
320             }
321 0     0   0 sub __fmt_uqarraywithqw(@) { "(" . &__fmt_uqlistwithqw . ")" }
322              
323             # Format list of pairs as "key1 => val1, key2 => val2, ..." without parens
324             sub __fmt_pairlist(@) {
325 0     0   0 my $result = "";
326 0         0 while (@_) {
327 0 0       0 confess "Odd arg count, expecting key => value pairs" if @_==1;
328 0 0       0 $result .= ", " if $result;
329 0         0 my $key = shift @_;
330 0         0 my $val = shift @_;
331 0         0 $result .= quotekey($key)." => ".vis($val);
332             }
333             $result
334 0         0 }
335             sub __fmt_pairs(@) {
336 0     0   0 __fmt_pairlist( map{ @$_ } @_ );
  0         0  
337             }
338              
339             # Concatenate strings separated by spaces, folding as necessary
340             # (strings are never broken; internal newlines go unnoticed).
341             # All lines (including the first) are indented the specified number of
342             # spaces. Explicit line-breaks may be included as "\n".
343             # A final newline is *not* included unless the last item ends with "\n".
344             sub __fill($;$$) {
345 0     0   0 my ($items, $indent, $foldwidth) = @_;
346 0   0     0 $indent //= 4;
347 0   0     0 $foldwidth //= 72;
348 0         0 my $buf = "";
349 0         0 my $llen = 0;
350 0         0 foreach (@$items) {
351 0 0 0     0 if ($_ eq "\n" or
      0        
352             ($llen > $indent && ($llen + length($_)) > $foldwidth)) {
353 0         0 $buf .= "\n";
354 0         0 $llen = 0;
355 0 0       0 next if $_ eq "\n";
356             }
357 0 0       0 if ($llen == 0) {
358 0         0 $buf .= (" " x $indent);
359 0         0 $llen = $indent;
360             } else {
361 0 0       0 if (substr($buf,-1) =~ /\S/) {
362 0         0 $buf .= " ";
363 0         0 ++$llen;
364             }
365             }
366 0         0 $buf .= $_;
367 0         0 $llen += length();
368             }
369 0         0 $buf;
370             }
371              
372             sub __fmt_colspec_cx($$) {
373             # "cx NN" or "COLSPEC [cx NN]" or "COLSPEC (NOT DEFINED)" if undef cx
374 0     0   0 my ($colspec, $cx) = @_;
375 0 0       0 if (ref($colspec) eq "Regexp") {
376 0         0 state $delimsets = [
377             [qw(/ /)], [qw({ })], [qw([ ])], [qw<( )>], [qw(< >)], [qw(« »)] ];
378 0         0 for (@$delimsets) {
379 0         0 my ($left, $right) = @$_;
380 0 0 0     0 if (index($colspec,$left)<0 && index($colspec,$right)<0) {
381 0         0 $colspec = "qr${left}${colspec}${right}";
382 0         0 last;
383             }
384             }
385             } else {
386 0         0 $colspec = visq($colspec);
387             }
388 0 0       0 return "$colspec (NOT DEFINED)"
389             if ! defined $cx;
390 0 0       0 $colspec eq "$cx" ? "cx $cx" : "$colspec [cx $cx]"
391             }
392             sub __fmt_cx($) {
393 28     28   62 my ($cx) = @_;
394 28 50       77 defined($cx) ? "cx $cx=".cx2let($cx) : "(undefined)"
395             }
396              
397             # Format %colx keys "intelligently". cx values are not shown for keys which are
398             # absolute column refs. Keys with undef values (from alias {optional => 1})
399             # are omitted since they are not currently valid. A final newline IS included.
400             sub _fmt_colx(;$$) {
401             my $self = shift;
402             my ($indent, $foldwidth) = @_;
403             my ($colx, $num_cols) = @$$self{qw{colx num_cols}};
404             # copy %$colx omitting keys with undef cx
405             my %hash = map{ defined($colx->{$_}) ? ($_ => $colx->{$_}) : () } keys %$colx;
406             my sub sortbycx(@) { sort { ($colx->{$a}//-1) <=> ($colx->{$b}//-1) } @_ }
407             my sub subset($) { # format items, deleting from %hash
408             my $specs = shift;
409             my (@items, $curr, $curr_desc);
410             my $curr_cx = -1;
411             my sub flush() {
412             return unless $curr_cx >= 0;
413             push @items, $curr.$curr_desc;
414             $curr = $curr_desc = undef; ##DEBUGGING
415             $curr_cx = -1;
416             }
417             my sub additem($$) {
418             (local $_, my $cx) = @_;
419             flush() if $curr_cx != $cx;
420             if ($curr_cx >= 0) {
421             $curr .= ",".quotekey($_);
422             } else {
423             $curr_cx = $cx;
424             $curr = quotekey($_);
425             my $misfit = (/^[A-Z]{1,2}$/ && $colx->{$_} != let2cx($_))
426             || (/^\d+$/ && $colx->{$_} != $_)
427             || (/^\D../) # show titles with cx too
428             ;
429             $curr_desc = $misfit ? "(cx ".vis($hash{$_}).")" : "";
430             }
431             }
432             foreach (@$specs) {
433             if (ref $_) {
434             push @items, $$_; # \"string" means insert "string" literally
435             } else {
436             # Work around old Perl lexical sub limitations...
437             my $item=$hash{$_};
438             oops(dvis '$_ $specs hash=',hvis(%hash)) unless defined $item;
439             additem($_, $item);
440             delete $hash{$_} // oops;
441             }
442             }
443             flush();
444             push @items, "\n" if @items; # wrap before next subset, or at end
445             @items
446             }
447             my @ABCs = subset [ map{ my $A = cx2let($_);
448             u($hash{$A}) eq $_ ? $A : \" "
449             } 0..$num_cols-1 ];
450              
451             # More lexical sub bug work-arounds...
452             my @ss1 = subset [sortbycx grep{ /^(=.*\D)\w+$/ } keys %hash]; # normal titles
453             my @ss2 = subset [sortbycx grep{ /^\d+$/ } keys %hash]; # numeric titles
454             my @ss3 = subset [sortbycx keys %hash]; # oddities
455             __fill [
456             @ABCs,
457             @ss1,
458             @ss2,
459             @ss3,
460             ], $indent, $foldwidth;
461             }
462              
463             # Is a title a special symbol or looks like a cx number?
464             sub __unindexed_title($$) {
465 14     14   28 my ($title, $num_cols) = @_;
466 14 50       27 oops unless defined $title;
467 14 50 33     143 $title eq ""
      33        
      33        
      33        
468             || $title eq '^'
469             || $title eq '$'
470             || ( ($title =~ /^[1-9]\d*$/ || $title eq "0") # not with leading zeros
471             && $title <= $num_cols )
472             }
473             sub _unindexed_title { #method for use by tests
474 0     0   0 my $self = shift;
475 0         0 __unindexed_title(shift(), $$self->{num_cols});
476             }
477              
478             # Return (normals, unindexed) where each is [title => cx, ...] sorted by cx
479             sub _get_usable_titles {
480 6     6   12 my $self = shift;
481 6         20 my ($rows, $title_rx, $num_cols) = @$$self{qw{rows title_rx num_cols}};
482 6   33     28 my $title_row = $rows->[$title_rx // oops];
483 6         21 my @unindexed;
484             my @normals;
485 6         0 my %seen;
486 6         21 for my $cx (0 .. $num_cols-1) {
487 14         36 my $title = $title_row->[$cx];
488 14 50       44 next if $title eq "";
489 14 50       50 if ($seen{$title}++) {
490 0 0       0 $self->_carponce("Warning: Non-unique title ", visq($title), " will not be usable for COLSPEC\n") unless $$self->{silent};
491 0         0 @normals = grep{ $_->[0] ne $title } @normals;
  0         0  
492 0         0 @unindexed = grep{ $_->[0] ne $title } @unindexed;
  0         0  
493 0         0 next;
494             }
495 14 50       40 if (__unindexed_title($title, $num_cols)) {
496 0         0 push @unindexed, [$title, $cx];
497             } else {
498 14         44 push @normals, [$title, $cx];
499             }
500             }
501 9         47 [sort { $a->[1] <=> $b->[1] } @normals],
502 6         42 [sort { $a->[1] <=> $b->[1] } @unindexed]
  0         0  
503             }
504              
505             # Non-OO api: Explicitly create a new sheet and make it the "current sheet".
506             # Options (e.g. to specify initial content) may be specified in an
507             # initial {OPTHASH} and/or as linear key => value pairs.
508             #
509             # Note: Most functions automatically create an empty sheet if no sheet
510             # exists, so this is only really needed when using more than one sheet
511             # or if you want to initialize a sheet from data in memory.
512             # N.B. the corresponding OO interface is Spreadsheet::Edit->new(...)
513             #
514             sub new_sheet(@) {
515 2     2 1 9578 my $opthash = &__opthash;
516 2         6 my %opts = (%$opthash, %{to_hash(@_)}); # new() merges these anyway
  2         6  
517              
518 2         5 my ($userpkg, $fn, $lno, $subname) = @{ __filter_frame(__usercall_info()) };
  2         7  
519 2 50       11 $userpkg = delete $opts{package} if exists $opts{package};
520              
521 2 50       8 my $pkgmsg = $opts{package} ? " [for pkg $userpkg]" : "";
522 2 50 33     17 croak "Invalid 'package' ",u($userpkg),"\n"
523             unless defined($userpkg) && $userpkg =~ /^[a-zA-Z][:\w]*$/a;
524              
525 2   33     7 $opts{data_source} ||= "Created at ${fn}:$lno by $subname";
526              
527 2         7 my $sheet = __silent_new(\%opts);
528              
529             log_call [$opthash, @_], [\(fmt_sheet($sheet), \$pkgmsg)]
530 2 50       8 if $$sheet->{verbose};
531              
532 2         11 $pkg2currsheet{$userpkg} = $sheet
533             }
534              
535             # logmsg() - Concatenate strings to form a "log message", possibly
536             # prefixed with a description of a "focus" sheet and optionally
537             # a specific row. A final \n is appended if needed.
538             #
539             # The "focus" sheet and row, if any, are determined as follows:
540             #
541             # If the first argument is a sheet object, [sheet_object],
542             # [sheet_object, rx], or [sheet_object, undef] then the indicated
543             # sheet and (optionally) row are used. Note that if called as a method
544             # the first arg will be the sheet object.
545             #
546             # Otherwise the first arg is not special and is included in the message.
547             #
548             # If no sheet is identified above, then the caller's package active
549             # sheet is used, if any.
550             #
551             # If still no sheet is identified, then the sheet of the innermost apply
552             # currently executing (anywhere up the stack) is used, if any; this sheet
553             # is internally saved in a global by the apply* methods.
554             #
555             # If a sheet is identified but no specific rx, then the
556             # "current row" of an active apply on that sheet is used, if any.
557             #
558             # If a focus sheet or sheet & row were identified, then the caller-supplied
559             # message is prefixed by "(<description>):" or "(row <num> <description>):"
560             # where <description> comes from:
561             #
562             # 1) If the sheet attribute {logmsg_pfx_gen} is defined to a subref,
563             # the sub is called and all returned items other than undef are
564             # concatenated (any undefs in the returned list are ignored); otherwise
565             #
566             # 2) The "sheetname" property is used, if defined; otherwise
567             #
568             # 3) the "data_source" property is used, which defaults to the name of the
569             # spreadsheet read by read_spreadsheet().
570             #
571             # FIXME: I should either rename logmsg_pfx_gen as logmsg_sheetdesc_gen
572             # to reflect that it only generated the sheet-description part,
573             # or else make prefix generators produce the entire message prefix
574             # including any row number.
575             #
576             sub _default_pfx_gen($$) {
577 274     274   559 my ($sheet, $rx) = @_;
578 274 50       1330 confess "bug" unless ref($sheet) =~ /^Spreadsheet::Edit\b/;
579 274 50       629 ($sheet->sheetname() || $sheet->data_source())
580             }
581             sub logmsg(@) {
582 285     285 1 293098 my ($sheet, $rx);
583 285 100 100     1272 if (@_ > 0 && ref($_[0])) {
584 208 100 33     836 if (ref($_[0]) =~ /^Spreadsheet::Edit\b/) {
    50 33        
585 24         53 $sheet = shift;
586             }
587             elsif (ref($_[0]) eq "ARRAY"
588 184         957 && @{ $_[0] } <= 2
589             && ref($_[0]->[0])."" =~ /^Spreadsheet::Edit\b/) {
590 184         288 ($sheet, $rx) = @{ shift @_ };
  184         420  
591             }
592             }
593 285         642 my $msgstr = join("", grep{defined} @_);
  320         924  
594 285 100       639 if (! defined $sheet) {
595 77         219 $sheet = $pkg2currsheet{scalar(caller)};
596             }
597 285 100       606 if (! defined $sheet) {
598 22         77 $sheet = $Spreadsheet::Edit::_inner_apply_sheet;
599             }
600 285 100       549 if (! defined $rx) {
601 125 100       309 $rx = eval{ $sheet->rx() } if defined($sheet);
  114         291  
602             }
603 285         477 my @prefix;
604 285 100       537 if (defined $sheet) {
605 274         504 push @prefix, "(";
606 274 100 100     755 if (defined($rx) && ($rx < 0 || $rx > $#{$sheet->rows})) {
      100        
607 80         266 push @prefix, "Row ".($rx+1)."[INVALID RX $rx] ";
608 80         123 $rx = undef; # avoid confusing user's prefix generator
609             } else {
610 194 100       555 push @prefix, "Row ".($rx+1)." " if defined($rx);
611             }
612 274   50     664 my $pfxgen = $sheet->attributes->{logmsg_pfx_gen} // \&_default_pfx_gen;
613 274         676 push @prefix, &$pfxgen($sheet, $rx), "): ";
614             }
615 285 100       862 my $suffix = ($msgstr =~ /\n\z/s ? "" : "\n");
616             # If the message string starts with newlines, put them before the prefix
617 285 50       778 if ($msgstr =~ s/\A(\n+)([^\n])/$2/s) {
618 0         0 unshift @prefix, $1;
619             }
620 285         527 return join "", grep{defined} @prefix, $msgstr, $suffix;
  1593         4548  
621             }
622              
623             #####################################################################
624             # Locate the nearest call to a public sub in the call stack.
625             # [Now *GENERIC*]
626             #
627             # A "public" sub means anything named starting with [a-z], irrespective
628             # of package, and not mentioned in $USERCALL_NOT.
629             # The assumption is that internal subs are named with an initial
630             # underscore or are ALLCAPS (e.g. for constants).
631             #
632             # RETURNS
633             # ([frame], [called args]) in array context
634             # [frame] in scalar context
635             #
636             # "frame" means caller(n) results:
637             # 0 1 2 3
638             # package filename linenum subname ...
639             #
640             sub __usercall_info(;$) {
641 15     15   40 for (my $lvl=1 ; ; ++$lvl) {
642 53         114 @DB::args = \$lvl if _CALLER_OVERRIDE_CHECK_OK; # see mytraceback()
643 53         76 my @frame = do{ package
644 53         499 DB; caller($lvl) };
645 53 50       145 oops dvis('$lvl @frame') unless defined($frame[0]);
646 53 100       317 if ( $frame[3] =~ /::([a-z][^:]*)/ ) {
647 15 100       154 return \@frame unless wantarray;
648 2         4 my @args;
649 2         4 my $hasargs = $frame[4];
650 2 50       5 if ($hasargs) {
651 2         4 eval{ @args = @DB::args };
  2         6  
652 2 50       5 @args=("<perl bug(?) prevented getting args>") if $@;
653             }
654 2         11 return (\@frame, \@args)
655             }
656             }
657             }
658              
659             sub __filter_frame($) { #clean-up/abbreviate for display purposes
660 5     5   14 my @frame = @{shift @_};
  5         21  
661 5         195 $frame[1] = basename $frame[1]; # filename
662 5         38 $frame[3] =~ s/.*:://; # subname
663             \@frame
664 5         30 }
665             sub __fn_ln_methname() {
666 3     3   9 @{ __filter_frame(__usercall_info()) }[1,2,3]; # (fn, lno, subname)
  3         11  
667             }
668              
669             sub __methname() {
670 0     0   0 (&__fn_ln_methname())[2]
671             }
672              
673             sub __find_userpkg() {
674 10     10   28 ${ __usercall_info() }[0];
  10         34  
675             }
676              
677             # This always returns the caller's caller's package but also
678             # checks that it is not an internal call, which should never happen
679             sub __callerpkg() {
680 1     1   11 my $pkg = (caller(1))[0];
681 1 50       6 oops if index($pkg,__PACKAGE__) == 0;
682 1         4 $pkg
683             }
684              
685             # Create a new object without allowing any logging. This is used when
686             # new() is called implicitly by something else and new's log messages
687             # might display an internal filename (namely Edit.pm).
688             #
689             # debug/verbose args are removed from the arguments passed to new()
690             # and put back into the object after it is created.
691             sub __silent_new(@) {
692 2     2   6 my $opthash = &__opthash;
693 2         7 my $new_args = to_hash(@_);
694              
695 2         4 my %saved;
696 2         5 foreach my $key (qw/verbose debug/) {
697 4 50       10 $saved{$key} = $opthash->{$key} if exists($opthash->{$key});
698 4         8 $opthash->{$key} = 0; # force off
699 4 50       13 $saved{$key} = delete($new_args->{$key}) if exists($new_args->{$key});
700             }
701              
702 2         14 my $self = Spreadsheet::Edit->new($opthash, %$new_args);
703              
704 2         16 delete @$$self{qw/verbose debug/};
705 2         9 $self->_set_verbose_debug_silent(%saved);
706              
707 2         6 $self
708             }
709              
710             #####################################################################
711             # Get "self" for a function/method combo sub:
712             # If the first arg is an object ref we shift it off and use that
713             # (i.e. assume it is called as a method); otherwise we assume it's a
714             # functional-API function call and use the caller's "current sheet"
715             # (if none exists, __self creates one but __selfmust throws).
716             #
717             # This must be used with special syntax like
718             # my $self = &__self;
719             # which re-uses @_ so we can shift @_ as seen by our caller.
720              
721             sub __self_ifexists {
722              
723             # If the first arg is an object ref, shift it off and return it;
724             # Otherwise, if the caller's "current sheet" exists, return that;
725             # otherwise return undef.
726              
727             (defined(blessed($_[0])) && $_[0]->isa(__PACKAGE__) && shift(@_))
728 1120 100 66 1120   8131 || $pkg2currsheet{__find_userpkg()};
      66        
729             }
730             sub __selfmust { # sheet must exist, otherwise throw
731 841 50   841   1378 &__self_ifexists || do{
732 0         0 my $pkg = caller(1);
733 0         0 croak __methname()," : No sheet is defined in $pkg\n"
734             };
735             }
736              
737             sub __self { # a new empty sheet is created if necessary
738 279 50   279   442 &__self_ifexists || do{
739             # Create a new empty sheet and make it the caller's "current sheet".
740 0         0 my %opts;
741 0         0 my ($frame, $args) = __usercall_info();
742              
743 0         0 my ($userpkg, $fn, $lno, $subname) = @{ __filter_frame($frame) };
  0         0  
744 0         0 $opts{data_source} = "(Created implicitly by '$subname' at ${fn}:$lno)";
745            
746 0         0 my $self = $pkg2currsheet{$userpkg} = __silent_new(\%opts);
747 0         0 $self
748             }
749             }
750              
751              
752             ## Helpers...
753              
754             sub __opthash {
755 35 100   35   147 ref($_[0]) eq "HASH" ? shift(@_) : {}
756             }
757             sub __selfmust_opthash {
758 8     8   25 my $self = &__selfmust;
759 8         34 my $opthash = &__opthash;
760 8         28 ($self, $opthash)
761             }
762             sub __self_opthash {
763 5     5   14 my $self = &__self;
764 5         29 my $opthash = &__opthash;
765 5         20 ($self, $opthash)
766             }
767             sub __selfonly {
768 274     274   508 my $self = &__self;
769 274 50       648 confess __methname, " expects no arguments!\n" if @_;
770 274         1224 $self
771             }
772             sub __selfmustonly {
773 553     553   850 my $self = &__selfmust;
774 553 50       1248 confess __methname, " expects no arguments!\n" if @_;
775 553         2118 $self
776             }
777              
778             sub __self_opthash_Nargs($@) { # (num_expected_args, @_)
779 2     2   5 my $Nargs = shift;
780 2         5 my ($self, $opthash) = &__self_opthash;
781             #croak
782 2 50       7 croak __methname, " expects $Nargs arguments, not ",scalar(@_),"\n"
783             if $Nargs != @_;
784 2         17 ($self, $opthash, @_)
785             }
786 0     0   0 sub __self_opthash_0args { unshift @_,0; goto &__self_opthash_Nargs }
  0         0  
787 2     2   6 sub __self_opthash_1arg { unshift @_,1; goto &__self_opthash_Nargs }
  2         6  
788 0     0   0 sub __self_opthash_2args { unshift @_,2; goto &__self_opthash_Nargs }
  0         0  
789 0     0   0 sub __self_opthash_3args { unshift @_,3; goto &__self_opthash_Nargs }
  0         0  
790              
791             # Check that an option hash has only valid keys, and values aren't undef
792             sub __validate_opthash($$;@) {
793 10     10   47 my ($opthash, $valid_keys, %opts) = @_;
794 10 50       31 return unless defined $opthash; # silently accept undef
795 10         67 foreach my $k (keys %$opthash) {
796             croak "Unrecognized ",($opts{desc}//"option")," '$k'"
797 1 50 0 1   9 unless first{$_ eq $k} @$valid_keys;
  1         4  
798             confess "Option '$k' must be defined"
799             if $opts{undef_ok} && !defined($opthash->{$k})
800 1 50 33     25 && !grep{$_ eq $k} @{$opts{undef_ok}};
  0   33     0  
  0         0  
801             }
802             $opthash
803 10         26 }
804              
805             # Copy verbose/debug/silent options into $self, deleting them from
806             # the provided options hash.
807             # RETURNS: Hash of original values to pass to _restore_stdopts()
808             #
809             # This is used by methods which accept {verbose} etc. options
810             # which override what is in the object for the duration of that method call.
811             sub _set_stdopts {
812 10     10   30 my ($self, $opthash) = @_;
813 10         22 my $previous = {};
814 10         30 foreach my $key (qw/verbose debug silent/) {
815 30 100       74 if (exists $opthash->{$key}) {
816 2         7 $previous->{$key} = $$self->{$key};
817 2         8 $$self->{$key} = delete($opthash->{$key});
818             }
819             }
820             $previous
821 10         24 }
822             sub _restore_stdopts {
823 10     10   29 my $self = shift;
824 10         20 my $saved = shift;
825 10         89 @$$self{keys %$saved} = values %$saved;
826             }
827              
828             sub _validate_ident($) {
829 0 0   0   0 croak "identifier is undef!" unless defined $_[0];
830 0 0       0 croak "identifier is empty" unless $_[0] ne "";
831 0 0       0 croak ivisq '"$_[0]" is not a valid identifier\n'
832             unless $_[0] eq title2ident($_[0]);
833 0         0 $_[0]
834             }
835              
836             # Check that an option hash has only valid keys
837             sub __validate_4pthash($$;$) {
838 0     0   0 my ($opthash, $valid_keys, $optdesc) = @_;
839 0 0       0 return unless defined $opthash; # silently accept undef
840 0         0 foreach my $k (keys %$opthash) {
841             croak "Unrecognized ",($optdesc//"option")," '$k'"
842 0 0 0 0   0 unless first{$_ eq $k} @$valid_keys;
  0         0  
843             }
844             $opthash
845 0         0 }
846              
847             sub __validate_nonnegi($;$) {
848 4 50 0 4   31 croak(($_[1]//"argument")." must be a non-negative integer",
      33        
849             " (not ".u($_[0]).")")
850             unless defined($_[0]) && "$_[0]" =~ /^\d+$/;
851 4         13 $_[0]
852             }
853             sub __validate_nonnegi_or_undef($;$) {
854 0 0 0 0   0 croak(($_[1]//"argument")." must be a non-negative integer or undef",
      0        
855             " (not ".u($_[0]).")")
856             unless !defined($_[0]) || "$_[0]" =~ /^\d+$/;
857 0         0 $_[0]
858             }
859              
860             sub __validate_pairs(@) {
861 4 50   4   15 unless ((scalar(@_) % 2) == 0) {
862 0 0       0 croak __methname," does not accept an {OPTIONS} hash here"
863             if (ref($_[0]) eq "HASH");
864 0         0 confess "In call to ",__methname,
865             " : uneven arg count, expecting key => value pairs"
866             }
867 4         33 foreach (pairs @_) {
868 1         4 my $key = $_->[0];
869 1 50       7 confess "In call to ",__methname," the key '$key' looks suspicious"
870             unless $key =~ /^\w+$/;
871             }
872             @_
873 4         22 }
874              
875             sub _check_rx {
876 0     0   0 my ($self, $rx, $one_past_end_ok) = @_;
877 0 0 0     0 confess __methname.": Illegal rx ",vis($rx),"\n"
878             unless ($rx//"") =~ /^\d+$/; # non-negative integer
879 0         0 my $maxrx = $#{$$self->{rows}};
  0         0  
880 0 0       0 confess __methname.": rx ".vis($rx)." is beyond the last row\n"
    0          
881             .dvis(' $$self')
882             if $rx > ($one_past_end_ok ? ($maxrx+1) : $maxrx);
883             }
884              
885             # Diagnose scalar context if there are no results.
886             sub __first_ifnot_wantarray(@) {
887 0     0   0 my $wantarray = (caller(1))[5];
888 0 0       0 return @_ if $wantarray;
889 0 0       0 return $_[0] if @_;
890 0 0       0 croak __methname, " called in scalar context but that method does not return a result.\n"
891             if defined($wantarray);
892             }
893             sub __validate_not_scalar_context(@) {
894 0     0   0 my $wantarray = (caller(1))[5];
895 0 0 0     0 croak __methname, " returns an array, not a scalar"
896             unless $wantarray || !defined($wantarray);
897             @_
898 0         0 }
899              
900             sub _carponce { # if not silent
901 0     0   0 my $self = shift;
902 0         0 my $msg = join "",@_;
903 0 0       0 return if $$self->{_carponce}->{$msg}++;
904 0 0       0 $msg .= "\n" unless $msg =~ /\n\z/s;
905             carp($msg)
906 0 0       0 unless $$self->{silent}; # never appears even if silent is later unset
907             }
908              
909             ###################### METHODS/FUNCTIONS #######################
910              
911             # Unlike other methods, new() takes key => value pair arguments.
912             # For consistency with other methods an initial {OPTIONS} hash is
913             # also allowed, but it is not special in any way and is merged
914             # with any linear args (linear args override {OPTIONS}).
915              
916             sub new { # Strictly OO, this does not affect caller's "current sheet".
917             # The corresponding functional API is new_sheet() which explicitly
918             # creates a new sheet and makes it the 'current sheet'.
919 4     4 1 9 my $classname = shift;
920 4 50 33     30 croak "Invalid/missing CLASSNAME (i.e. \"this\") arg"
921             unless defined($classname) && $classname =~ /^[\w_:]+$/;
922              
923 4         12 my $opthash = &__opthash;
924             # Special handling of {cmd_nesting) since there was no object to begin with:
925             # Internal callers may pass this as a "user" option in {OPTARGS};
926             # we won't log it, but we plant it into the object below.
927             # **THE CALLER MUST DECREMENT IT LATER IF NEEDED*
928 4   50     42 my $cmd_nesting = delete($opthash->{cmd_nesting}) // 0;
929              
930 4         23 my %opts = (verbose => $Verbose, debug => $Debug, silent => $Silent,
931             %$opthash,
932             __validate_pairs(@_));
933            
934 4         50 my $self;
935 4 50       33 if (my $clonee = delete $opts{clone}) { # untested as of 2/12/14
936 0         0 delete @opts{qw/verbose debug silent/};
937 0 0       0 croak "Other options not allowed with 'clone': ",hvis(%opts) if %opts;
938 0         0 require Clone;
939 0         0 $self = Clone::clone($clonee); # in all its glory
940             $$self->{data_source} = (delete $opts{data_source})
941 0   0     0 // "cloned from $$self->{data_source}";
942             } else {
943             my $hash = {
944             attributes => delete $opts{attributes} // {},
945             linenums => delete $opts{linenums} // [],
946             meta_info => delete $opts{meta_info} // [], ##### ???? obsolete ???
947             data_source => delete $opts{data_source}, # // "(none)",
948             num_cols => delete $opts{num_cols}, # possibly undef
949              
950             # %colx maps titles, aliases (automatic and user-defined), and
951             # spreadsheet column lettercodes to the corresponding column indicies.
952 4   50     89 colx => {},
      50        
      50        
953             colx_desc => {}, # for use in error messages
954             useraliases => {}, # key exists for user-defined alias names
955              
956             title_rx => undef,
957             first_data_rx => undef,
958             last_data_rx => undef,
959             current_rx => undef, # valid during apply()
960              
961             pkg2tiedvarnames => {},
962             pkg2tieall => {},
963             };
964              
965             # We can not use $hash directly as the object representation because %{}
966             # is overloaded, so we use a scalar ref (pointing to the hashref)
967             # as the object.
968 4         14 $self = bless \$hash, $classname;
969              
970             # Create a tied virtual array which creates Magicrows when assigned to.
971 4         7 my @rows; tie @rows, 'Spreadsheet::Edit::RowsTie', $self;
  4         25  
972 4         8 $hash->{rows} = \@rows;
973              
974 4 100       26 if (my $newdata = delete $opts{rows}) {
975 2         6 foreach (@$newdata) {
976 6         29 push @rows, $_;
977             }
978             }
979             }# not cloning
980              
981 4         56 $$self->{cmd_nesting} = $cmd_nesting;
982              
983 4         62 $self->_set_verbose_debug_silent(%opts); # croaks if other keys remain
984              
985             # Validate data, default num_cols, pad rows, etc.
986 4         39 $self->_rows_replaced();
987              
988             log_call [$opthash,@_], [\fmt_sheet($self)]
989 4 50       16 if $$self->{verbose};
990              
991 4         45 $self
992             }#new
993              
994             use overload
995             # As an ARRAYref, a sheet acts like \@rows which is (a ref to) a
996             # virtual array of Magicrow objects, each of which is a dual array/hash
997             # ref to cells in a given row (via RowsTie).
998 0     0   0 '@{}' => sub { my $hash = ${ shift() }; $hash->{rows}; },
  0         0  
  0         0  
999              
1000             # As a HASHref, a sheet acts like \%crow which is (a ref to)
1001             # the hash view of the current row during 'apply'
1002 0     0   0 '%{}' => sub { my $self = shift;
1003             # probably less efficient but avoids repeating code
1004 0         0 \%{ $self->crow() };
  0         0  
1005             },
1006             #'""' => sub { shift },
1007             #'0+' => sub { shift },
1008             #'==' => sub { my ($self, $other, $swap) = @_; $self == $other },
1009             #'eq' => sub { my ($self, $other, $swap) = @_; "$self" eq "$other" },
1010 3         54 fallback => 1,
1011 3     3   36 ;
  3         7  
1012              
1013             sub _rows_replaced { # completely new or replaced rows, linenums, etc.
1014 6     6   16 my ($self) = @_;
1015 6         11 my $hash = $$self;
1016              
1017             my ($rows, $linenums, $num_cols, $current_rx)
1018 6         18 = @$hash{qw/rows linenums num_cols current_rx/};
1019              
1020 6 50       16 croak "Can not replace sheet content during an apply!\n"
1021             if defined $current_rx;
1022 6         24 for my $rx (0..$#$rows) {
1023 12         38 my $row = $rows->[$rx];
1024 12 50       31 croak "rows must contain refs to arrays of cells (row $rx is $row)"
1025             unless __looks_like_aref($row);
1026 12         27 for my $cx (0..$#$row) {
1027 30 50       53 croak "New cell at Row ",$rx+1," Column ",cx2let($cx)," contains a ref"
1028             if ref($row->[$cx]);
1029             }
1030             }
1031             croak '\'linenums\' if present must be (a ref to) an array of numbers, ',
1032             ' "" or "???"', ivis('\nnot $linenums\n')
1033             unless ref($linenums) eq "ARRAY"
1034 6 50 33 6   67 && all{ defined() and looks_like_number($_) || !/[^\?]/ } @$linenums;
  6 50 33     24  
1035              
1036 6 100       35 if (@$rows) { # Determine num_cols and pad short rows
1037 4         8 my $nc = 0;
1038 4 100       10 foreach (@$rows) { $nc = @$_ if @$_ > $nc }
  12         29  
1039 4 50 33     14 if ($num_cols && $num_cols != $nc) {
1040 0         0 croak "num_cols=$num_cols was specified along with initial data, but\n",
1041             "the value doesn't match the data (which has up to $nc columns)\n"
1042             } else {
1043 4         10 $hash->{num_cols} = $num_cols = $nc;
1044             }
1045             # Pad short rows with empty fields
1046 4         10 foreach my $row (@$rows) {
1047 12         25 push @$row, (("") x ($num_cols - @$row));
1048             }
1049 4         10 $#$linenums = $#$rows;
1050 4   100     12 foreach (@$linenums) { $_ //= '???' };
  12         40  
1051             } else {
1052             # There is no data. Default num_cols to zero, but leave any
1053             # user-supplied value so a subsequent insert_rows() will know how
1054             # many columns to create.
1055 2   50     8 $hash->{num_cols} //= 0;
1056             }
1057             #oops unless $hash->{data_source};
1058 6 50       15 croak "#linenums ($#$linenums) != #rows ($#$rows)\n",
1059             dvis '$hash->{linenums}\n$hash->{rows}'
1060             unless @$linenums == @$rows;
1061              
1062 6         12 $hash->{title_rx} = undef;
1063 6         10 $hash->{first_data_rx} = undef;
1064 6         11 $hash->{last_data_rx} = undef;
1065 6         14 $hash->{useraliases} = {};
1066 6         41 $self->_rebuild_colx; # Set up colx colx_desc
1067 6         13 $self
1068             }#_rows_replaced
1069              
1070             #########################################################
1071             # Combination FUNCTION/METHOD
1072             # These are declared with signatures for use as functional-API FUNCTIONs
1073             # which use the caller's "current sheet" as the implicit object.
1074             #
1075             # However they may also be called as METHODs with an explicit object.
1076             #########################################################
1077              
1078             # Allow user to find out names of tied variables
1079             sub tied_varnames(;@) {
1080 0     0 0 0 my ($self, $opts) = &__selfmust_opthash;
1081 0   0     0 my $pkg = $opts->{package} // __callerpkg();
1082 0   0     0 my $h = $$self->{pkg2tiedvarnames}->{$pkg} //= {};
1083 0         0 return keys %$h;
1084             }
1085              
1086             # Internal: Tie specified variables into a package if not already tied.
1087             # Returns:
1088             sub __TCV_REDUNDANT() { 1 } # if all were already tied
1089             sub __TCV_OK() { 2 } # otherwise (some tied, or no idents specified)
1090             #
1091             sub _tie_col_vars {
1092 3     3   10 my $self = shift;
1093 3         7 my $pkg = shift;
1094 3         6 my $parms = shift;
1095             # Remaining arguments are idents
1096              
1097 3         8 my ($safe, $file, $lno) = @$parms;
1098 3 100       20 my @safecheck_pkgs = $pkg eq "main" ? ($pkg) : ($pkg, "main");
1099              
1100             my ($colx, $colx_desc, $debug, $silent)
1101 3         14 = @$$self{qw/colx colx_desc debug silent/};
1102              
1103             # FIXME: BUG/ISSUE ...
1104             # Why is it correct to keep tiedvarnames PER-SHEET ?
1105             # Isn't this a global property of each package?
1106              
1107 3   50     31 my $tiedvarnames = ($$self->{pkg2tiedvarnames}->{$pkg} //= {});
1108              
1109 3 50 33     28 if (@_ > 0 && %$tiedvarnames) {
1110             SHORTCUT: {
1111 0         0 foreach (@_) {
  0         0  
1112 0 0       0 last SHORTCUT unless exists $tiedvarnames->{$_};
1113             }
1114 0         0 return __TCV_REDUNDANT;
1115             }
1116             }
1117              
1118             VAR:
1119 3         15 foreach (sort {$a->[0] <=> $b->[0]} # sort for ease of debugging
  26         62  
1120             map {
1121 16         26 my $cx = $colx->{$_};
1122             defined($cx)
1123 16 50       121 ? [ $cx, $_, $colx_desc->{$_} ]
1124             : [ 999999, $_, "(currently NOT DEFINED)" ];
1125             } @_
1126             )
1127             {
1128 16         149 my ($cx, $ident, $desc) = @$_;
1129 16 50       84 oops unless $ident =~ /^\w+$/;
1130              
1131 16 50       37 if (exists $tiedvarnames->{$ident}) {
1132 0 0       0 $self->_log(" Previously tied: \$${pkg}::${ident}\n") if $debug;
1133             next
1134 0         0 }
1135              
1136 16 50       32 btw dvis '##ZZ $ident $safe ${^GLOBAL_PHASE} @safecheck_pkgs\n' if $debug;
1137              
1138 3     3   4502 no strict 'refs';
  3         7  
  3         463  
1139 16 100       27 if ($safe) {
1140 8 50       25 if (${^GLOBAL_PHASE} ne "START") {
1141 0 0       0 $self->_carponce("Not tieing new variables because :safe was used and this is not (any longer) during compile time\n") unless $silent;
1142 0         0 return __TCV_REDUNDANT; ### IMMEDIATE EXIT ###
1143             }
1144 8         14 foreach my $p (@safecheck_pkgs) {
1145             # Per 'man perlref' we can not use *foo{SCALAR} to detect a never-
1146             # declared SCALAR (it's indistinguishable from an existing undef var).
1147             # So we must insist that the entire glob does not exist.
1148 16 50       28 btw ivis ' Checking pkg $p ident $ident ...\n' if $debug;
1149 3     3   24 no strict 'refs';
  3         8  
  3         570  
1150 16 50       21 if (exists ${$p.'::'}{$ident}) {
  16         57  
1151 0         0 my $msg = <<EOF ;
1152             COLSPEC '$ident' clashes with an existing object in package '$p' .
1153             This is dis-allowed when tie_column_vars was called with option :safe,
1154             in this case at ${file}:${lno} . In this situation you can not
1155             explicitly declare the tied variables, and they must be tied and
1156             imported before the compiler sees them.
1157              
1158             Note: The clash might not be with a scalar \$$ident, but something else
1159             named '${ident}' in the same package (array, hash, sub, filehandle,
1160             etc.) Unfortunately it is not possible to distinguish a non-existent
1161             scalar from a declared scalar containing an undef value
1162             (see *foo{SCALAR} in 'man perlref'). Therefore, to be safe,
1163             nothing is allowed to pre-exist with the name '${ident}'.
1164             EOF
1165             # We can detect anything other than an undef scalar
1166 0         0 local $Data::Dumper::Maxdepth = 2;
1167 0         0 local $Data::Dumper::Interp::Maxdepth = 2;
1168 0         0 my $fqname = $p."::".$ident;
1169 3     3   22 no strict 'refs';
  3         10  
  3         34226  
1170 0 0       0 if (my $r = *$fqname{SCALAR}) {
1171 0 0       0 if (defined($$r)) {
1172 0         0 $msg .= "\nExisting *Defined* Scalar \$$ident = ".vis($$r)."\n";
1173             }
1174             }
1175 0 0       0 if (my $r = *$fqname{ARRAY}) {
1176 0         0 $msg .= "\nExisting Array \@$fqname = ".avis(@$r)."\n";
1177             }
1178 0 0       0 if (my $r = *$fqname{HASH}) {
1179 0         0 $msg .= "\nExisting Hash \%$fqname = ".hvis(%$r)."\n";
1180             }
1181 0 0       0 if (my $r = *$fqname{CODE}) {
1182 0         0 $msg .= "\nExisting $fqname = ".Data::Dumper->new([$r])->Terse(1)->Indent(0)->Deparse(1)->Dump()."\n";
1183             }
1184 0 0       0 if (my $r = *$fqname{IO}) {
1185 0         0 $msg .= "\nExisting IO object $fqname (file/dir handle, etc.)\n";
1186             }
1187 0 0       0 if (eval{ require Devel::Symdump; }) {
  0         0  
1188 0         0 my $obj = Devel::Symdump->new($p);
1189 0         0 foreach (qw/FILEHANDLE.s DIRHANDLE.s UNKNOWN.s PACKAGE.s/) {
1190 0 0       0 /^(.+)\.(.*)$/ or die; my $kind = lc $1; my $methname = lc $1.$2;
  0         0  
  0         0  
1191 0 0       0 if (grep{$_ eq $ident} $obj->$methname()) {
  0         0  
1192 0         0 $msg .= "\n Found existing $kind object $fqname\n";
1193             }
1194             }
1195             }
1196 0         0 croak "\n$msg\n";
1197             }
1198             }
1199             }
1200              
1201 16 50       31 $self->_log("tie \$${pkg}::${ident} to $desc\n") if $debug;
1202              
1203 16         27 $tiedvarnames->{$ident} = 1;
1204              
1205 16         93 *{"$pkg\::$ident"} = \${ *{gensym()} };
  16         256  
  16         25  
  16         49  
1206              
1207 16         41 tie ${"${pkg}::$ident"}, 'Tie::Indirect::Scalar',
  16         80  
1208             \&_tiecell_helper, $pkg, $ident;
1209             }
1210 3         36 return __TCV_OK;
1211             }
1212             sub _tiecell_helper {
1213 2     2   134 my($mutating, $pkg, $ident) = @_;
1214 2   33     27 my $sheet = $pkg2currsheet{$pkg}
1215             // croak "No sheet is currently valid for package $pkg\n";
1216 2         21 $sheet->_onlyinapply("tied variable \$$ident");
1217              
1218             # WRONG... it croaks bc sheet->{rows}->[rx] is a rowhash which doesn't like undef keys
1219             # WRONG: This returns \undef if $ident is not currently valid
1220 2         13 \( $$sheet->{rows}->[$$sheet->{current_rx}]->{$ident} )
1221             }
1222              
1223             sub _all_valid_idents {
1224 3     3   5 my $self = shift;
1225 3         7 my %valid_idents;
1226 3         6 foreach (keys %{ $$self->{colx} }) {
  3         18  
1227 18 50       51 if (/^(?:REGERROR|REGMARK|AUTOLOAD)$/) {
1228 0         0 $self->_carponce("WARNING: Column key ",visq($_)," conflicts with a Perl built-in; variable will not be tied.\n");
1229 0         0 next;
1230             }
1231 18         33 $valid_idents{ title2ident($_) } = 1;
1232             }
1233 3         27 return keys %valid_idents;
1234             }
1235              
1236             sub tie_column_vars(;@) {
1237 3     3 1 20 my ($self, $opts) = &__self_opthash;
1238             # Any remaining args specify variable names matching
1239             # alias names, either user-defined or automatic.
1240              
1241 3 50       18 croak "tie_column_vars without arguments (did you intend to use ':all'?)"
1242             unless @_;
1243              
1244 3   33     37 local $$self->{silent} = $opts->{silent} // $$self->{silent};
1245 3   33     23 local $$self->{verbose} = $opts->{verbose} // $$self->{verbose};
1246 3   33     21 local $$self->{debug} = $opts->{debug} // $$self->{debug};
1247              
1248 3   66     12 my $pkg = $opts->{package} // __callerpkg();
1249              
1250 3         6 my (%tokens, @varnames);
1251 3 50       10 foreach (@_) { if (/:/) { $tokens{$_} = 1 } else { push @varnames, $_ } }
  5         23  
  5         14  
  0         0  
1252 3         11 foreach (@varnames) {
1253 0 0       0 croak "Invalid variable name '$_'\n" unless /^\$?\w+$/;
1254 0         0 s/^\$//;
1255             }
1256              
1257             # With ':all' tie all possible variables, now and in the future.
1258             #
1259             # CURRENTLY UNDOCUMENTED: With the ":safe" token, a check is made
1260             # that variables do not already exist immediately before tying them;
1261             # otherwise an exception is thrown.
1262             #
1263             # When ':safe' is combined with ':all', variables will not be checked & tied
1264             # except during compile time, i.e. within BEGIN{...}. Therefore a
1265             # malicious spreadsheet can not cause an exception after the compilation
1266             # phase.
1267 3         7 my $safe = delete $tokens{':safe'};
1268 3         11 my ($file, $lno) = __fn_ln_methname();
1269 3         12 my $parms = [$safe, $file, $lno];
1270              
1271             # Why? Obsolete? Only for :all?? [note added Dec22]
1272             # FIXME TODO: is this bogus?
1273 3 50       15 $self->title_rx($opts->{title_rx}) if exists $opts->{title_rx};
1274              
1275 3 50       28 if (delete $tokens{':all'}) {
1276             # Remember parameters for tie operations which might occur later
1277 3         21 $$self->{pkg2tieall}->{$pkg} = $parms;
1278              
1279 3         14 push @varnames, sort $self->_all_valid_idents;
1280             }
1281 3 50       13 croak "Unrecognized token in arguments: ",avis(keys %tokens) if %tokens;
1282              
1283 3         38 my $r = $self->_tie_col_vars($pkg, $parms, @varnames);
1284              
1285 3 50       13 my $pfx = ($r == __TCV_REDUNDANT ? "[ALL REDUNDANT] " : "");
1286             log_methcall $self, [\$pfx, \__fmt_uqarraywithqw(keys %tokens, @varnames),
1287             \" in package $pkg"]
1288 3 50       23 if $$self->{verbose};
1289             }#tie_column_vars
1290              
1291             #
1292             # Accessors for misc. sheet data
1293             #
1294              
1295 274     274 1 392 sub attributes(@) { ${&__selfonly}->{attributes} }
  274         498  
1296 0     0 1 0 sub colx() { ${&__selfmustonly}->{colx} }
  0         0  
1297 0     0 1 0 sub colx_desc() { ${&__selfmustonly}->{colx_desc} }
  0         0  
1298             sub data_source(;$) {
1299 274     274 1 507 my $self = &__selfmust;
1300 274 50       658 if (@_ == 0) { # 'get' request
1301 274 50       629 log_methcall $self, [], [$$self->{data_source}] if $$self->{verbose};
1302             return $$self->{data_source}
1303 274         1065 }
1304 0 0       0 log_methcall $self, [@_] if $$self->{verbose};
1305 0 0       0 croak "Too many args" unless @_ == 1;
1306 0         0 $$self->{data_source} = $_[0];
1307 0         0 $self
1308             }
1309 0     0 1 0 sub linenums() { ${&__selfmustonly}->{linenums} }
  0         0  
1310 0     0 1 0 sub num_cols() { ${&__selfmustonly}->{num_cols} }
  0         0  
1311 161     161 1 254 sub rows() { ${&__selfmustonly}->{rows} }
  161         292  
1312 274     274 1 392 sub sheetname() { ${&__selfmustonly}->{sheetname} }
  274         459  
1313              
1314 0     0 0 0 sub iolayers() { ${&__selfmustonly}->{iolayers} }
  0         0  
1315 0     0 0 0 sub meta_info() {${&__selfmustonly}->{meta_info} }
  0         0  
1316             sub input_encoding() {
1317             # Emulate old API. We actually store input_iolayers instead now,
1318             # so as to include :crlf if necessary.
1319 0     0 0 0 my $self = &__selfmustonly;
1320 0         0 local $_;
1321             return undef unless
1322 0         0 exists(${$self}->{input_iolayers})
1323 0 0 0     0 && ${$self}->{input_iolayers} =~ /encoding\(([^()]*)\)/;
  0         0  
1324 0         0 return $1;
1325             }
1326              
1327             # See below for title_rx()
1328             sub title_row() {
1329 0     0 1 0 my $self = &__selfmustonly;
1330 0         0 my $title_rx = $$self->{title_rx};
1331 0 0       0 my $r = defined($title_rx) ? $$self->{rows}->[$title_rx] : undef;
1332 0 0       0 log_methcall $self, [], [$r] if $$self->{verbose};
1333 0         0 $r
1334             }
1335 118     118 1 199 sub rx() { ${ &__selfmustonly }->{current_rx} }
  118         226  
1336             sub crow() {
1337 0     0 1 0 my $self = &__selfmustonly;
1338 0         0 ${ $self->_onlyinapply("crow() method") }->{rows}->[$$self->{current_rx}]
  0         0  
1339             }
1340             sub linenum() {
1341 0     0 1 0 my $self = &__selfmustonly;
1342 0   0     0 my $current_rx = $$self->{current_rx} // return(undef);
1343 0         0 $$self->{linenums}->[$current_rx];
1344             }
1345             sub _getref {
1346 0     0   0 my ($self, $rx, $ident) = @_;
1347 0         0 my ($rows, $colx) = @$$self{qw/rows colx/};
1348 0 0 0     0 croak "get/set: rx $rx is out of range" if $rx < 0 || $rx > $#$rows;
1349 0         0 my $row = $$self->{rows}->[$rx];
1350 0         0 my $cx = $colx->{$ident};
1351 0 0 0     0 oops("Invalid cx ".vis($cx)) if ! defined($cx) || $cx < 0 || $cx > $#$row;
      0        
1352 0         0 \$row->[$cx];
1353             }
1354             # get/set a cell given by (rx,COLSPEC)
1355             sub get($$) {
1356 0     0 1 0 my $self = &__selfmust;
1357 0         0 my $ref = $self->_getref(@_);
1358 0         0 $$ref;
1359             }
1360             sub set($$$) {
1361 0     0 1 0 my $self = &__selfmust;
1362 0         0 my ($rx, $colspec, $newval) = @_;
1363 0         0 my $ref = $self->_getref($rx, $colspec);
1364 0         0 $$ref = $newval;
1365 0         0 $self
1366             }
1367              
1368             sub __validate_sheet_arg($) {
1369 12     12   22 my $sheet = shift;
1370 12 50 33     118 croak "Argument '${\u($sheet)}' is not a Spreadsheet::Edit sheet object"
  0   66     0  
1371             if defined($sheet) and
1372             !blessed($sheet) || !$sheet->isa("Spreadsheet::Edit");
1373 12         32 $sheet;
1374             }
1375              
1376             #---- logging ----------------------------------
1377              
1378             # Print segmented log messages:
1379             # Join args together, prefixing with "> " or ">> " etc.
1380             # unless the previous call did not end with newline.
1381             # Maintains internal state. A final call with an ending \n must occur.
1382             sub _log {
1383 0     0   0 my $self = shift;
1384 0         0 state $in_midst;
1385             print STDERR join "",
1386             ($in_midst ? "" : (">" x ($$self->{cmd_nesting}||1))),
1387 0 0 0     0 map{u} @_;
  0         0  
1388 0         0 $in_midst = ($_[$#_] !~ /\n\z/s);
1389             }
1390              
1391             my $trunclen = 200;
1392             sub fmt_sheet($) {
1393 0     0 0 0 my $sheet = shift;
1394 0 0       0 return "undef" unless defined($sheet);
1395             #oops unless ref($sheet) ne "" && $sheet->isa(__PACKAGE__);
1396 0         0 local $$sheet->{verbose} = 0;
1397 0   0     0 my $desc = $sheet->sheetname() || $sheet->data_source() || "no data_source";
1398 0 0       0 if (length($desc) > $trunclen) { $desc = substr($desc,($trunclen-3))."..." }
  0         0  
1399 0         0 my $r = addrvis($sheet)."($desc)";
1400 0         0 $r
1401             }
1402              
1403             sub _logmethifv { # args: just linearized INPUT items
1404 0 0   0   0 return unless ${$_[0]}->{verbose};
  0         0  
1405 0         0 my $self = shift;
1406 0         0 log_methcall $self, [@_];
1407             }
1408              
1409             sub _logmethretifv { # args: [INPUTS], [OUTPUTS]
1410 8 50   8   31 oops if @_ > 3; # (self, items, retvals)
1411 8 50       14 return unless ${$_[0]}->{verbose};
  8         33  
1412 0         0 goto &log_methcall
1413             }
1414             #-----------------------------------------------
1415              
1416             sub _call_usercode($$$) {
1417 6     6   17 my ($self, $code, $cxlist) = @_;
1418              
1419 6 50       18 if (@$cxlist) {
1420 0         0 my $row = $self->crow();
1421 0         0 foreach ($row->[$cxlist->[0]]) { # bind $_ to the first-specified column
1422 0         0 &$code(@$row[@$cxlist]);
1423             }
1424             } else {
1425 6         35 $code->();
1426             ##Simplify backtraces
1427             #@_ = ();
1428             #goto &$code;
1429             }
1430             }
1431              
1432             # Do apply, handling COLSPEC args.
1433             # If $rxlists or $rxfirst & $rxlast are undef, visit all rows.
1434             sub _apply_to_rows($$$;$$$) {
1435 6     6   29 my ($self, $code, $cxlist, $rxlist, $rxfirst, $rxlast) = @_;
1436 6         16 my $hash = $$self;
1437 6         30 my ($linenums,$rows,$num_cols,$cl) = @$hash{qw/linenums rows num_cols/};
1438              
1439 6 50       29 croak $self->_methmsg("Missing or incorrect {code} argument")
1440             unless ref($code) eq "CODE";
1441 6         25 foreach (@$cxlist) {
1442 0 0 0     0 if ($_ < 0 || $_ >= $num_cols) {
1443 0         0 croak $self->_methmsg("cx $_ is out of range")
1444             }
1445             }
1446              
1447             { # Temp save "current_rx" from an enclosing apply
1448 6         16 local $hash->{current_rx} = undef;
  6         25  
1449              
1450             # Temp update "current apply sheet" for logmsg()
1451 6         28 local $_inner_apply_sheet = $self;
1452              
1453 6 100       48 if (defined $rxlist) {
1454 5         26 foreach my $rx (@$rxlist) {
1455 5 50 33     52 croak "rx $rx is out of range"
1456             if $rx < 0 || $rx > $#$rows;
1457 5         21 $hash->{current_rx} = $rx;
1458 5         44 _call_usercode($self,$code,$cxlist);
1459             }
1460             } else {
1461             # Do not cache $#$rows so user can call insert_rows() or delete_rows()
1462 1   50     10 for (my $rx = $rxfirst // 0;
      33        
      66        
1463             $rx <= $#$rows && (!defined($rxlast) || $rx <= $rxlast);
1464             $rx++)
1465             {
1466 1         7 $hash->{current_rx} = $rx;
1467 1         8 _call_usercode($self,$code,$cxlist);
1468 1         464 $rx = $hash->{current_rx}; # might have been changed by delete_rows()
1469             }
1470             }
1471             }
1472              
1473             croak "After completing apply, an enclosing apply was resumed, but",
1474             " current_rx=",$hash->{current_rx}," now points beyond the last row!\n"
1475 6 50 33     998 if defined($hash->{current_rx}) && $hash->{current_rx} > $#$rows;
1476 6         36 $self
1477             }#_apply_to_rows
1478              
1479             # Rebuild %colx and %colx_desc, and tie any required new variables.
1480             #
1481             # User-defined column aliases must already be valid in %colx;
1482             # all other entries are deleted and re-created.
1483             #
1484             # Note: the special '^', '$' and numieric cx values (if in 0..num_cols)
1485             # are handled algorithmically in _specs2cxdesclist() before consulting %colx.
1486             #
1487             # When building %colx, conflicts are resolved using these priorities:
1488             #
1489             # User-defined aliases (ALWAYS valid)
1490             # Titles (if unique)
1491             # Trimmed titles (with leading & trailing spaces removed)
1492             # Automatic aliases
1493             # ABC letter-codes
1494             #
1495             # Warnings are issued once for each conflict.
1496             sub _rebuild_colx {
1497             my $self = shift;
1498             my $notie = $_[0]; # true during autodetect probing
1499              
1500             my ($silent, $colx, $colx_desc, $useraliases, $num_cols, $title_rx,
1501             $rows, $debug, $pkg2tieall)
1502             = @$$self{qw/silent colx colx_desc useraliases num_cols title_rx
1503             rows debug pkg2tieall/};
1504              
1505             # Save user-defined Aliases before wiping %colx
1506             my %useralias;
1507             foreach my $alias (keys %$useraliases) {
1508             my $cx = $colx->{$alias};
1509             # cx may be undef if referenced column was deleted and/or if
1510             # an alias was created with {optional => TRUE} with non-matching regex.
1511             #next if !defined($cx); # the referenced column was deleted
1512             $useralias{$alias} = [$cx, $colx_desc->{$alias}];
1513             }
1514              
1515             # Now re-generate
1516             %$colx = ();
1517             %$colx_desc = ();
1518              
1519             my sub __putback($$$;$) {
1520             my ($key, $cx, $desc, $nomasking) = @_;
1521             if (defined (my $ocx = $colx->{$key})) {
1522             if ($cx != $ocx) {
1523             oops if $nomasking; # _get_usable_titles should have screen out
1524             $self->_carponce("Warning: ", visq($key), " ($desc) is MASKED BY (",
1525             $colx_desc->{$key}, ")") unless $silent;
1526             }
1527             } else {
1528             oops if exists $colx->{$key};
1529             $colx->{$key} = $cx;
1530             $colx_desc->{$key} = $desc;
1531             }
1532             }
1533              
1534             # Put back the user aliases
1535             while (my ($alias,$aref) = each %useralias) {
1536             my ($cx, $desc) = @$aref;
1537             __putback($alias, $cx, $desc);
1538             }
1539              
1540             if (defined $title_rx) {
1541             # Add non-conflicting titles
1542             my ($normal_titles, $unindexed_titles) = $self->_get_usable_titles;
1543             foreach (@$normal_titles) {
1544             my ($title, $cx) = @$_;
1545             __putback($title, $cx, __fmt_cx($cx).": Title", 1); # nomasking==1
1546             }
1547             # Titles with leading & trailing spaces trimmed off
1548             foreach (@$normal_titles) {
1549             my ($title, $cx) = @$_;
1550             my $key = $title;
1551             $key =~ s/\A\s+//s; $key =~ s/\s+\z//s;
1552             if ($key ne $title) {
1553             __putback($key, $cx, __fmt_cx($cx).": Title sans lead/trailing spaces",1);
1554             }
1555             }
1556             # Automatic aliases
1557             # N.B. These come from all titles, not just "normal" ones
1558             foreach (@$normal_titles, @$unindexed_titles) {
1559             my ($title, $cx) = @$_;
1560             my $ident = title2ident($title);
1561             __putback($ident, $cx, __fmt_cx($cx).": Automatic alias for title");
1562             }
1563             }
1564             my %abc;
1565             foreach my $cx ( 0..$num_cols-1 ) {
1566             my $ABC = cx2let($cx);
1567             __putback($ABC, $cx, "cx $cx: Standard letter-code");
1568             }
1569              
1570             unless ($notie) {
1571             # export and tie newly-defined magic variables to packages which want that.
1572             if (my @pkglist = grep {defined $pkg2tieall->{$_}} keys %$pkg2tieall) {
1573             my @idents = $self->_all_valid_idents;
1574             foreach my $pkg (@pkglist) {
1575             $self->_tie_col_vars($pkg, $pkg2tieall->{$pkg}, @idents);
1576             }
1577             }
1578             }
1579             } # _rebuild_colx
1580              
1581             # Move and/or delete column positions. The argument is a ref to an array
1582             # containing the old column indicies of current (i.e. surviving) columns,
1583             # or undefs for new columns which did not exist previously.
1584             sub _adjust_colx {
1585 0     0   0 my ($self, $old_colxs) = @_;
1586             my ($colx, $colx_desc, $num_cols, $useraliases, $debug)
1587 0         0 = @$$self{qw/colx colx_desc num_cols useraliases debug/};
1588 0 0       0 oops unless @$old_colxs == $num_cols;
1589 0         0 my %old2new;
1590 0         0 foreach my $new_cx (0..$#$old_colxs) {
1591 0         0 my $old_cx = $old_colxs->[$new_cx];
1592 0 0       0 $old2new{$old_cx} = $new_cx if defined $old_cx;
1593             }
1594             # User-defined aliases are for arbitrary columns, so fix them manually
1595 0         0 foreach my $alias (keys %$useraliases) {
1596 0         0 my $cx = $colx->{$alias};
1597 0 0       0 next unless defined $cx; # e.g. non-unique title; see _rebuild_colx()
1598 0 0       0 if (defined (my $new_cx = $old2new{$cx})) {
1599 0 0       0 warn ">adjusting colx{$alias} : $colx->{$alias} -> $new_cx\n" if $debug;
1600 0         0 $colx->{$alias} = $new_cx;
1601             } else {
1602 0 0       0 warn ">deleting colx{$alias} (was $colx->{$alias})\n" if $debug;
1603 0         0 delete $colx->{$alias};
1604 0         0 delete $colx_desc->{$alias};
1605 0         0 delete $useraliases->{$alias};
1606             }
1607             }
1608             # Everything else is derived from actual titles
1609 0         0 $self->_rebuild_colx();
1610             }
1611              
1612             # Translate list of COLSPECs to a list of [cx,desc].
1613             # Regexes may match multiple columns.
1614             # THROWS if a spec does not indicate any existing column.
1615             sub _specs2cxdesclist {
1616 0     0   0 my $self = shift;
1617 0         0 my ($colx, $colx_desc, $num_cols) = @$$self{qw/colx colx_desc num_cols/};
1618 0         0 my @results;
1619 0         0 foreach my $spec (@_) {
1620 0 0       0 croak "Column specifier is undef!" unless defined $spec;
1621 0 0       0 if ($spec eq '^') {
1622 0         0 push @results, [0, "Special '^' specifier for first col"];
1623             next
1624 0         0 }
1625 0 0       0 if ($spec eq '$') {
1626 0         0 push @results, [$num_cols-1, "Special '\$' specifier for last col"];
1627             next
1628 0         0 }
1629 0 0 0     0 if (($spec =~ /^[1-9]\d*$/ || $spec eq "0")
      0        
1630             && $spec <= $num_cols) { # allow one-past-end
1631 0         0 push @results, [$spec, "Numeric column-index"];
1632             next
1633 0         0 }
1634 0 0       0 if (defined (my $cx = $colx->{$spec})) {
1635 0         0 push @results, [$cx, $colx_desc->{$spec}];
1636             next
1637 0         0 }
1638 0 0       0 if (ref($spec) eq 'Regexp') {
1639 0         0 my ($title_rx, $rows) = @$$self{qw/title_rx rows/};
1640 0 0       0 croak "Can not use regex: No title-row is defined!\n"
1641             unless defined $title_rx;
1642 0   0     0 my $title_row = $rows->[$title_rx] // oops;
1643 0         0 my $matched;
1644 0         0 for my $cx (0..$#$title_row) {
1645 0         0 my $title = $title_row->[$cx];
1646             # Note: We can't use /s here! The regex compiler has already
1647             # encapsulated /s or lack thereof in the compiled regex
1648 0 0       0 if ($title =~ /$spec/) {
1649 0         0 push @results, [$cx, "cx $cx: regex matched title '$title'"];
1650 0         0 $matched++;
1651             }
1652             }
1653 0 0       0 if (! $matched) {
1654 0         0 croak "\n--- Title Row (rx $title_rx) ---\n",
1655             vis($title_row),"\n-----------------\n",
1656             "Regex $spec\n",
1657             "does not match any of the titles (see above) in '",
1658             fmt_sheet($self),"'\n"
1659             # N.B. check for "does not match" in alias()
1660             }
1661             next
1662 0         0 }
1663 0         0 croak "Invalid column specifier '${spec}'\nnum_cols=$num_cols. Valid keys are:\n",
1664             $self->_fmt_colx;
1665             }
1666 0 0       0 oops unless wantarray;
1667             @results
1668 0         0 }#_specs2cxdesclist
1669             sub _spec2cx { # return $cx or ($cx, $desc); throws if spec is invalid
1670 0     0   0 my ($self, $spec) = @_;
1671 0         0 my @list = $self->_specs2cxdesclist($spec);
1672 0 0       0 if (@list > 1) {
1673             croak ivis("Regexpr $spec matches multiple titles:\n "),
1674 0         0 join("\n ",map{ vis $_->[1] } @list), "\n";
  0         0  
1675             }
1676 0         0 __first_ifnot_wantarray( @{$list[0]} ) # cx or (cx,desc)
  0         0  
1677             }
1678              
1679             sub _colspec2cx {
1680 0     0   0 my ($self, $colspec) = @_;
1681 0 0       0 croak "COLSPEC may not be a regex" if ref($colspec) eq 'Regexp';
1682 0         0 goto &_spec2cx
1683             }
1684              
1685             # The user-callable API
1686             # THROWS if a spec does not indicate any existing column.
1687             # Can return multiple results due to multple args and/or Regexp multimatch
1688             # In scalar context returns the first result.
1689             sub spectocx(@) { # the user-callable API
1690 0     0 1 0 my $self = &__selfmust;
1691 0         0 my @list = $self->_specs2cxdesclist(@_);
1692 0         0 __first_ifnot_wantarray( map{ $_->[0] } @list )
  0         0  
1693             }
1694              
1695             # Translate a possibly-relative column specification which
1696             # indicate 1 off the end.
1697             #
1698             # The specification may be
1699             # >something (the column after 'something')
1700             # or
1701             # an absolute column indicator (cx or ABC), possibly 1 off the end
1702             # or
1703             # refer to an existing column
1704             #
1705             sub _relspec2cx {
1706 0     0   0 my ($self, $spec) = @_;
1707 0         0 my $colx = $$self->{colx};
1708 0 0       0 if ($spec =~ /^>(.*)/) {
1709 0         0 my $cx = $self->_colspec2cx($1); # croaks if not an existing column
1710 0         0 return $cx + 1
1711             }
1712 0         0 $self->_colspec2cx($spec); # croaks if not an existing column
1713             }
1714              
1715             sub alias(@) {
1716 0     0 1 0 my $self = &__selfmust;
1717 0 0       0 my $opthash = ref($_[0]) eq 'HASH' ? shift() : {};
1718 0 0       0 if ($opthash) {
1719 0         0 __validate_opthash($opthash, [qw(optional)],
1720             desc => "alias option");
1721             }
1722 0 0       0 croak "'alias' expects an even number of arguments\n"
1723             unless scalar(@_ % 2)==0;
1724              
1725             my ($colx, $colx_desc, $num_cols, $useraliases, $rows, $silent, $debug)
1726 0         0 = @$$self{qw/colx colx_desc num_cols useraliases rows silent debug/};
1727              
1728 0         0 my ($file, $lno) = __fn_ln_methname();
1729              
1730 0         0 my @cxlist;
1731 0         0 while (@_) {
1732 0         0 my $ident = _validate_ident( shift @_ );
1733 0         0 my $spec = shift @_;
1734              
1735 0 0       0 if (my $wheredefined = $useraliases->{$ident}) {
1736 0         0 croak "'$ident' is already a user-defined alias",
1737             " for cx ", scalar($self->_spec2cx($ident)),
1738             " defined at ", $wheredefined, "\n"
1739             #" (for cx ", scalar($self->_spec2cx($ident)), ")"
1740             }
1741             croak "'$ident' is already a user-defined alias (for cx ",
1742             scalar($self->_spec2cx($ident)), ")"
1743 0 0       0 if $useraliases->{$ident};
1744              
1745 0         0 my $cx = eval{ $self->_spec2cx($spec) };
  0         0  
1746 0 0       0 unless(defined $cx) {
1747 0 0       0 oops unless $@;
1748 0 0 0     0 croak $@ unless $opthash->{optional} && $@ =~ /does not match/is;
1749             # Always throw on other errors, e.g. regex matches more than one title
1750             };
1751              
1752             # Log each pair individually
1753 0         0 $self->_logmethifv($opthash, \", $ident => ",\__fmt_colspec_cx($spec,$cx));
1754              
1755 0         0 $colx->{$ident} = $cx;
1756 0         0 $colx_desc->{$ident} = "alias for ".__fmt_cx($cx)." (".quotekey($spec).")";
1757 0         0 $useraliases->{$ident} = "${file}:$lno";
1758 0         0 push @cxlist, $cx;
1759             }
1760 0         0 $self->_rebuild_colx();
1761              
1762 0         0 __first_ifnot_wantarray( @cxlist )
1763             }#alias
1764              
1765             sub unalias(@) {
1766 0     0 1 0 my $self = &__selfmust;
1767 0 0       0 croak __methname, " does not accept an {OPTIONS} hash\n"
1768             if ref($_[0]) eq 'HASH';
1769              
1770             my ($colx, $colx_desc, $useraliases)
1771 0         0 = @$$self{qw/colx colx_desc useraliases/};
1772              
1773 0         0 foreach (@_) {
1774 0   0     0 delete $useraliases->{$_} // croak "unalias: '$_' is not a column alias\n";
1775 0         0 $self->_logmethifv(\" Removing alias $_ => ", \$colx_desc->{$_});
1776 0   0     0 delete $colx->{$_} // oops;
1777 0   0     0 delete $colx_desc->{$_} // oops;
1778             }
1779 0         0 $self->_rebuild_colx();
1780 0         0 $self
1781             }
1782              
1783             # title_rx: Get/set the title row index
1784             #
1785             # $rx = title_rx ; # Retrieve
1786             #
1787             # title_rx undef # Set to no titles
1788             #
1789             # title_rx ROWINDEX # Set to specified rx
1790             #
1791             # title_rx 'auto' # Auto-detect the title row; an exception is thrown
1792             # # if a plausible row is not found.
1793             #
1794             # {OPTARGS} may contain
1795             # verbose, silent, debug (temporarily override the object's settings)
1796             #
1797             # Auto-detect options:
1798             # required => [COLSPEC, ...] # required titles
1799             # min_rx, max_rx => NUM # range of rows which may contain the row
1800             # first_cx => NUM # first column ix which must contain a valid title
1801             # last_cx => NUM # last column ix which must contain a valid title
1802             #
1803             # Note: This is called internally by read_spreadsheet(), passing 'auto'
1804             # by default. Therefore users need not call this method explicitly
1805             # except to change title row or if read_spreadsheet was not used at all.
1806             #
1807             sub title_rx(;$@) {
1808 8     8 1 45 my ($self, $opthash_arg) = &__selfmust_opthash;
1809 8         28 my $opthash = { %$opthash_arg }; # make copy so we can modify it
1810 8         33 my @orig_args = @_;
1811              
1812 8         43 my $saved_stdopts = $self->_set_stdopts($opthash);
1813 8     8   81 scope_guard{ $self->_restore_stdopts($saved_stdopts) };
  8         37  
1814 8         61 __validate_opthash( $opthash,
1815             [qw(required min_rx max_rx first_cx last_cx)],
1816             desc => "autodetect option",
1817             undef_ok => [] );
1818 8         20 my $rx = -999;
1819 8 100       41 if (@_ == 0) {
1820             # A return value was requested
1821 2 50       13 croak '{OPTARGS} passed to title_rx with no operator (get request?)'
1822             if %$opthash;
1823 2         19 $rx = $$self->{title_rx};
1824 2         154 $self->_logmethretifv([], [$rx]);
1825             } else {
1826             # N.B. undef arg means there are no titles
1827 6         14 $rx = shift;
1828 6 100       11 my $notie; $notie = shift() if u($_[0]) eq "_notie"; # during auto-detect probes
  6         29  
1829 6 50       71 croak "Extraneous argument(s) to title_rx: ".avis(@_) if @_;
1830              
1831 6 50       18 if (defined $rx) {
1832 6 100       62 if ($rx eq 'auto') {
    50          
    50          
1833 1         6 $rx = $self->_autodetect_title_rx($opthash);
1834             }
1835             elsif ($rx !~ /^\d+$/) {
1836 0         0 croak "Invalid title_rx argument: ", visq($rx);
1837             }
1838 5         26 elsif ($rx > $#{ $$self->{rows} }) {
1839 0         0 croak "Rx $rx is beyond the end of the data", visq($rx);
1840             }
1841             }
1842 6         18 $$self->{title_rx} = $rx;
1843 6         42 $self->_logmethretifv([$opthash_arg, @orig_args], [$rx]);
1844 6         40 $self->_rebuild_colx($notie);
1845             }
1846 8         40 $rx;
1847             }#title_rx
1848              
1849             sub _autodetect_title_rx {
1850 1     1   4 my ($self, $opthash) = @_;
1851              
1852             my ($title_rx, $rows, $colx, $num_cols, $verbose, $debug) =
1853 1         7 @$$self{qw(title_rx rows colx num_cols verbose debug)};
1854              
1855 1 50       4 if ($#$rows == -1) {
1856 0         0 return undef; # completely empty
1857             }
1858              
1859             # Filter out titles which can not be used as a COLSPEC
1860             my @required_specs = $opthash->{required}
1861 1 50       12 ? to_array($opthash->{required}) : ();
1862 1 50       12 croak "undef value in {required}" if grep{! defined} @required_specs;
  0         0  
1863 1         4 @required_specs = grep{ !__unindexed_title($_, $num_cols) } @required_specs;
  0         0  
1864              
1865 1   50     7 my $min_rx = __validate_nonnegi($opthash->{min_rx}//0, "min_rx");
1866 1   33     11 my $max_rx = __validate_nonnegi($opthash->{max_rx}//$min_rx+3, "max_rx");
1867 1 50       10 $max_rx = $#$rows if $max_rx > $#$rows;
1868              
1869 1   50     6 my $first_cx = __validate_nonnegi($opthash->{first_cx}//0, "first_cx");
1870 1   50     12 my $last_cx = __validate_nonnegi($opthash->{last_cx}//INT_MAX, "last_cx");
1871 1 50       8 $last_cx = $num_cols-1 if $last_cx >= $num_cols;
1872              
1873 1         9 my @nd_reasons;
1874 1 50       5 if ($min_rx > $#$rows) {
    50          
1875 0         0 push @nd_reasons, "min_rx ($min_rx) is out of range";
1876             }
1877             elsif ($min_rx > $max_rx) {
1878 0         0 push @nd_reasons,
1879             "min_rx ($min_rx) is greater than max_rx ($max_rx)"
1880             }
1881 1 50       11 if ($first_cx >= $num_cols) {
    50          
1882 0         0 push @nd_reasons, "first_cx ($first_cx) is out of range"
1883             }
1884             elsif ($first_cx > $last_cx) {
1885 0         0 push @nd_reasons,
1886             "first_cx ($first_cx) is less than last_cx ($last_cx)"
1887             }
1888              
1889 1         7 my $detected;
1890 1 50       6 unless (@nd_reasons) {
1891             # no logging during trial and error (except with debug)
1892 1         4 local $$self->{verbose} = $debug;
1893 1         3 local $$self->{silent} = !$debug;
1894 1         4 RX: for my $rx ($min_rx .. $max_rx) {
1895 1 50 33     4 warn "# ",$nd_reasons[-1],"\n" if $debug && @nd_reasons;
1896 1 50       4 warn ivis '#autodetect: Trying RX $rx ...\n' if $debug;
1897              
1898             # Make $rx the title_rx so __specs2cxdesclist() can be used
1899             # e.g. to handle regex COLSPECS. Pass special option to not tie
1900             # user variables yet.
1901 1         9 $self->title_rx($rx, "_notie");
1902 1 50       6 oops unless $rx == $$self->{title_rx};
1903              
1904 1         5 my $row = $rows->[$rx];
1905 1         3 for my $cx ($first_cx .. $last_cx) {
1906 2 50       18 if ($row->[$cx] eq "") {
1907 0         0 push @nd_reasons, "rx $rx: col ".__fmt_cx($cx)." is empty";
1908 0         0 next RX;
1909             }
1910             }
1911 1         8 foreach my $spec (@required_specs) {
1912 0         0 my @list; # A regex might match multiple titles
1913 0         0 eval { @list = $self->_specs2cxdesclist($spec) };
  0         0  
1914 0 0       0 warn ivis ' found $spec in @list\n' if $debug;
1915 0 0       0 if (@list == 0) {
1916 0         0 push @nd_reasons, ivis 'rx $rx: Required column \'$spec\' not found';
1917             next RX
1918 0         0 }
1919 0 0       0 my @shortlist = grep{ $_->[0] >= $first_cx && $_->[0] <= $last_cx }
  0         0  
1920             @list;
1921 0 0       0 if (@shortlist == 0) {
1922 0         0 push @nd_reasons, ivis 'rx $rx: Matched \'$spec\' but in unacceptable cx '.avisl(map{$_->[0]} @list);
  0         0  
1923             next RX
1924 0         0 }
1925 0 0       0 if (! grep{ $_->[1] =~ /title/i } @shortlist) {
  0         0  
1926             ### ??? Can this actually happen ???
1927 0         0 push @nd_reasons, ivis 'rx $rx: \'$spec\' resolved to something other than a title: '.__fmt_pairs(@shortlist);
1928 0         0 next RX;
1929             }
1930 0 0       0 warn ivis ' <<cx is within $first_cx .. $last_cx>>\n' if $debug;
1931             }
1932 1         3 $detected = $rx;
1933             last
1934 1         3 }
1935 1         4 $$self->{title_rx} = undef; # will re-do below
1936             }
1937 1 50       4 if (defined $detected) {
1938 1 50       26 if ($verbose) { # should be $debug ??
1939 0         0 my ($fn, $lno, $methname) = __fn_ln_methname();
1940 0         0 print STDERR "[Auto-detected title_rx = $detected at ${fn}:$lno]\n";
1941             }
1942 1         9 local $$self->{cmd_nesting} = $$self->{cmd_nesting} + 1;
1943 1         4 local $$self->{verbose} = 0; # suppress normal logging
1944 1         4 $self->title_rx($detected); # shows collision warnings unless {silent}
1945 1         6 return $detected;
1946             } else {
1947 0 0       0 if (@nd_reasons == 0) {
1948 0         0 push @nd_reasons, ivis '(BUG?) No rows checked! num_cols=$num_cols rows=$$self->{rows}'.dvis '\n##($min_rx $max_rx $first_cx $last_cx)' ;
1949             }
1950 0         0 croak("In ",qsh(fmt_sheet($self))," ...\n",
1951             " Auto-detect of title_rx with options ",vis($opthash),
1952             dvis ' @required_specs\n',
1953             " failed because:\n ", join("\n ",@nd_reasons),
1954             "\n"
1955             );
1956             }
1957             }
1958              
1959             sub first_data_rx(;$) {
1960 0     0 1 0 my $self = &__self;
1961 0         0 my $first_data_rx = $$self->{first_data_rx};
1962 0 0       0 if (@_ == 0) { # 'get' request
1963 0         0 $self->_logmethretifv([], [$first_data_rx]);
1964 0         0 return $first_data_rx;
1965             }
1966 0         0 my $rx = __validate_nonnegi_or_undef( shift() );
1967 0         0 $self->_logmethretifv([$rx]);
1968             # Okay if this points to one past the end
1969 0 0       0 $self->_check_rx($rx, 1) if defined $rx; # one_past_end_ok=1
1970 0         0 $$self->{first_data_rx} = $rx;
1971 0         0 $self
1972             }
1973             sub last_data_rx(;$) {
1974 0     0 1 0 my $self = &__self;
1975 0         0 my $last_data_rx = $$self->{last_data_rx};
1976 0 0       0 if (@_ == 0) { # 'get' request
1977 0         0 $self->_logmethretifv([], [$last_data_rx]);
1978 0         0 return $last_data_rx;
1979             }
1980 0         0 my $rx = __validate_nonnegi_or_undef( shift() );
1981 0         0 $self->_logmethretifv([$rx]);
1982 0 0       0 if (defined $rx) {
1983 0         0 $self->_check_rx($rx, 1); # one_past_end_ok=1
1984             confess "last_data_rx must be >= first_data_rx"
1985 0 0 0     0 unless $rx >= ($$self->{first_data_rx}//0);
1986             }
1987 0         0 $$self->{last_data_rx} = $rx;
1988 0         0 $self
1989             }
1990              
1991             # move_cols ">COLSPEC",source cols...
1992             # move_cols "absolute-position",source cols...
1993             sub move_cols($@) {
1994 0     0 1 0 my $self = &__selfmust;
1995 0         0 my ($posn, @sources) = @_;
1996              
1997 0         0 my ($num_cols, $rows) = @$$self{qw/num_cols rows/};
1998              
1999 0         0 my $to_cx = $self->_relspec2cx($posn);
2000              
2001 0         0 my @source_cxs = map { scalar $self->_spec2cx($_) } @sources;
  0         0  
2002 0         0 my @source_cxs_before = grep { $_ < $to_cx } @source_cxs;
  0         0  
2003 0         0 my $insert_offset = $to_cx - scalar(@source_cxs_before);
2004 0         0 my @rsorted_source_cxs = sort { $b <=> $a } @source_cxs;
  0         0  
2005              
2006             $self->_logmethifv(\__fmt_colspec_cx($posn,$to_cx), \" <-- ",
2007 0         0 \join(" ",map{"$source_cxs[$_]\[$_\]"} 0..$#source_cxs));
  0         0  
2008              
2009 0 0       0 croak "move destination is too far to the right\n"
2010             if $to_cx + @sources - @source_cxs_before > $num_cols;
2011              
2012 0         0 my @old_cxs = (0..$num_cols-1);
2013              
2014 0         0 foreach my $row (@$rows, \@old_cxs) {
2015 0         0 my @moving_cells = @$row[@source_cxs]; # save
2016 0         0 splice @$row, $_, 1 foreach (@rsorted_source_cxs); # delete
2017 0         0 splice @$row, $insert_offset, 0, @moving_cells; # put back
2018             };
2019              
2020 0         0 $self->_adjust_colx(\@old_cxs);
2021 0         0 $self
2022             }
2023 0     0 1 0 sub move_col($$) { goto &move_cols; }
2024              
2025             # insert_cols ">COLSPEC",new titles (or ""s or undefs if no title row)
2026             # insert_cols "absolute-position",...
2027             # RETURNS: The new colum indicies, or in scalar context the first cx
2028             sub insert_cols($@) {
2029 0     0 1 0 my $self = &__selfmust;
2030 0         0 my ($posn, @new_titles) = @_;
2031 0         0 my ($num_cols, $rows, $title_rx) = @$$self{qw/num_cols rows title_rx/};
2032              
2033 0         0 my $to_cx = $self->_relspec2cx($posn);
2034              
2035 0         0 $self->_logmethifv(\__fmt_colspec_cx($posn,$to_cx), \" <-- ", \avis(@new_titles));
2036              
2037 0   0     0 @new_titles = map { $_ // "" } @new_titles; # change undef to ""
  0         0  
2038 0     0   0 my $have_new_titles = first { $_ ne "" } @new_titles;
  0         0  
2039 0 0 0     0 if (!defined($title_rx) && $have_new_titles) {
2040 0         0 croak "insert_cols: Can not specify non-undef titles if title_rx is not defined\n"
2041             }
2042 0         0 my $num_insert_cols = @new_titles;
2043              
2044 0         0 foreach my $row (@$rows) {
2045 0 0 0     0 if (defined $title_rx && $row == $rows->[$title_rx]) {
2046 0         0 splice @$row, $to_cx, 0, @new_titles;
2047             } else {
2048 0         0 splice @$row, $to_cx, 0, (("") x $num_insert_cols);
2049             }
2050             }
2051 0         0 $$self->{num_cols} += $num_insert_cols;
2052              
2053 0         0 $self->_adjust_colx(
2054             [ 0..$to_cx-1, ((undef) x $num_insert_cols), $to_cx..$num_cols-1 ]
2055             );
2056              
2057 0         0 __first_ifnot_wantarray( $to_cx .. $to_cx+$num_insert_cols-1 )
2058             }
2059 0     0 1 0 sub insert_col($$) { goto &insert_cols }
2060              
2061             # sort_rows {compare function}
2062             # sort_rows {compare function} $first_rx, $last_rx
2063             sub sort_rows(&) {
2064 0     0 1 0 my $self = &__selfmust;
2065 0 0       0 croak "bad args" unless @_ == 1;
2066 0         0 my ($cmpfunc, $first_rx, $last_rx) = @_;
2067              
2068             my ($rows, $linenums, $title_rx, $first_data_rx, $last_data_rx)
2069 0         0 = @$$self{qw/rows linenums title_rx first_data_rx last_data_rx/};
2070              
2071 0 0 0     0 $first_rx //= $first_data_rx
      0        
2072             // (defined($title_rx) ? $title_rx+1 : 0);
2073 0   0     0 $last_rx //= $last_data_rx // $#$rows;
      0        
2074              
2075 0         0 $self->_logmethifv(\"(sorting rx ${first_rx}..${last_rx})");
2076              
2077 0 0       0 oops unless defined($first_rx);
2078 0 0       0 oops unless defined($last_rx);
2079 0         0 my $pkg = caller;
2080             my @indicies = sort {
2081 0         0 my @row_indicies = ($a, $b);
  0         0  
2082 3     3   49 no strict 'refs';
  3         7  
  3         23092  
2083 0         0 local ${ "$pkg\::a" } = $rows->[$a]; # actual row objects
  0         0  
2084 0         0 local ${ "$pkg\::b" } = $rows->[$b];
  0         0  
2085 0         0 $cmpfunc->(@row_indicies)
2086             } ($first_rx..$last_rx);
2087              
2088 0         0 @$rows[$first_rx..$#$rows] = @$rows[@indicies];
2089 0         0 @$linenums[$first_rx..$#$rows] = @$linenums[@indicies];
2090              
2091 0         0 __validate_not_scalar_context(0..$first_rx-1, @indicies, $last_rx+1..$#$rows)
2092             }
2093              
2094             sub delete_cols(@) {
2095 0     0 1 0 my $self = &__selfmust;
2096 0         0 my (@cols) = @_;
2097 0         0 my ($num_cols, $rows) = @$$self{qw/num_cols rows/};
2098              
2099 0         0 my @cxlist = $self->_colspecs_to_cxs_ckunique(\@cols);
2100              
2101 0         0 my @reverse_cxs = sort { $b <=> $a } @cxlist;
  0         0  
2102              
2103 0         0 $self->_logmethifv(reverse @reverse_cxs);
2104 0         0 my @old_cxs = (0..$num_cols-1);
2105 0         0 for my $row (@$rows, \@old_cxs) {
2106 0         0 foreach my $cx (@reverse_cxs) {
2107 0 0       0 oops if $cx > $#$row;
2108 0         0 splice @$row, $cx, 1, ();
2109             }
2110             }
2111 0         0 $$self->{num_cols} -= @reverse_cxs;
2112 0         0 $self->_adjust_colx(\@old_cxs);
2113 0         0 $self
2114             }
2115 0     0 1 0 sub delete_col($) { goto &delete_cols; }
2116              
2117             # Logic which forces verbose on when debug is on, etc.
2118             # Used by new() and options()
2119             sub _set_verbose_debug_silent(@) {
2120 6     6   14 my $self = shift;
2121 6         43 foreach (pairs @_) {
2122 12         32 my ($key, $val) = @$_;
2123 12         36 my $oldval = $$self->{$key};
2124             next
2125 12 50       34 unless !!$oldval != !!$val;
2126 0 0       0 if ($key eq "silent") {
    0          
    0          
2127 0         0 $$self->{$key} = $val;
2128             }
2129             elsif ($key eq "verbose") {
2130 0 0       0 if ($val) {
2131 0         0 $$self->{saved_silent} = $$self->{silent};
2132 0         0 $$self->{silent} = 0; #?? might still want to suppress warnings
2133             } else {
2134 0         0 $$self->{silent} = delete $$self->{saved_silent};
2135             }
2136             }
2137             elsif ($key eq "debug") {
2138 0 0       0 if ($val) {
2139 0         0 $$self->{saved_verbose} = $$self->{verbose};
2140 0         0 $$self->{saved_silent} = $$self->{silent};
2141 0         0 $$self->{silent} = 0;
2142 0         0 $$self->{verbose} = "forced by {debug}";
2143             } else {
2144 0         0 $$self->{verbose} = delete $$self->{saved_verbose};
2145 0         0 $$self->{silent} = delete $$self->{saved_silent};
2146             }
2147             }
2148 0         0 else { confess "options: Unknown option key '$key'\n"; }
2149 0         0 $$self->{$key} = $val;
2150             }
2151             }
2152              
2153             # Get or set option(s).
2154             # New settings may be in an {OPTIONS} hash and/or linear args.
2155             # Always returns the old options (key => value pairs).
2156             sub options(@) {
2157 0     0 1 0 my $self = &__self; # auto-create sheet if necessary
2158 0 0       0 my @old = map{ exists($$self->{$_}) ? ($_ => $$self->{$_}) : () }
  0         0  
2159             qw/verbose debug silent/;
2160            
2161 0         0 my %eff_args;
2162 0 0       0 if (@_ == 0) {
2163 0 0       0 croak "(list) returned but called in scalar or void context"
2164             unless wantarray;
2165             } else {
2166 0         0 my $opthash = &__opthash; # shift off 1st arg iff it is a hashref
2167 0         0 %eff_args = (%$opthash, &__validate_pairs);
2168 0         0 $self->_set_verbose_debug_silent(%eff_args);
2169             }
2170 0         0 $self->_logmethretifv([\__fmt_pairlist(%eff_args)], [\hvis(@old)]);
2171 0         0 @old;
2172             }
2173              
2174             sub _colspecs_to_cxs_ckunique {
2175 0 0   0   0 my ($self, $colspecs) = @_; oops unless @_==2;
  0         0  
2176 0         0 my @cxlist;
2177             my %seen;
2178 0         0 foreach (@$colspecs) {
2179 0         0 my $cx = $self->_spec2cx($_); # auto-detects title_rx if needed
2180 0 0       0 if ($seen{$cx}) {
2181 0         0 croak "cx $cx is specified by multiple COLSPECs: ", vis($_)," and ",vis($seen{$cx}),"\n";
2182             }
2183 0         0 $seen{ $cx } = $_;
2184 0         0 push @cxlist, $cx;
2185             }
2186             @cxlist
2187 0         0 }
2188              
2189             sub only_cols(@) {
2190 0     0 1 0 my $self = &__selfmust;
2191 0         0 my @cols = @_;
2192 0         0 my $rows = $self->rows;
2193              
2194             # Replace each row with just the surviving columns, in the order specified
2195 0         0 my @cxlist = $self->_colspecs_to_cxs_ckunique(\@cols);
2196 0         0 for my $row (@$rows) {
2197 0         0 @$row = map{ $row->[$_] } @cxlist;
  0         0  
2198             }
2199 0         0 $$self->{num_cols} = scalar(@cxlist);
2200 0         0 $self->_adjust_colx(\@cxlist);
2201 0         0 $self
2202             }
2203              
2204             # obj->join_cols separator_or_coderef, colspecs...
2205             # If coderef:
2206             # $_ is bound to the first-named column, and is the destination
2207             # @_ is bound to all named columns, in the order named.
2208             sub join_cols(&@) {
2209 0     0 1 0 my $self = &__selfmust;
2210 0         0 my ($separator, @sources) = @_;
2211 0         0 my $hash = $$self;
2212              
2213 0         0 my ($num_cols, $rows) = @$hash{qw/num_cols rows/};
2214              
2215 0         0 my @source_cxs = map { scalar $self->_spec2cx($_) } @sources;
  0         0  
2216             $self->_logmethifv(\"'$separator' ",
2217 0         0 \join(" ",map{"$source_cxs[$_]\[$_\]"} 0..$#source_cxs));
  0         0  
2218              
2219 0         0 my $saved_v = $hash->{verbose}; $hash->{verbose} = 0;
  0         0  
2220              
2221             # Merge the content into the first column. N.B. EXCLUDES title row.
2222             my $code = ref($separator) eq 'CODE'
2223             ? $separator
2224 0 0   0   0 : sub{ $_ = join $separator, @_ } ;
  0         0  
2225              
2226             # Note first/last_data_rx are ignored
2227 0   0     0 { my $first_rx = ($hash->{title_rx} // -1)+1;
  0         0  
2228 0         0 _apply_to_rows($self, $code, \@source_cxs, undef, $first_rx, undef);
2229             }
2230              
2231             # Delete the other columns
2232 0         0 $self->delete_cols(@source_cxs[1..$#source_cxs]);
2233              
2234 0         0 $$self->{verbose} = $saved_v;
2235 0         0 $self
2236             }
2237 0     0 1 0 sub join_cols_sep($@) { goto &join_cols } # to match the functional API
2238              
2239             sub rename_cols(@) {
2240 0     0 1 0 my $self = &__selfmust;
2241 0 0       0 croak "rename_cols expects an even number of arguments\n"
2242             unless scalar(@_ % 2)==0;
2243              
2244 0         0 my ($num_cols, $rows, $title_rx) = @$$self{qw/num_cols rows title_rx/};
2245              
2246 0 0       0 croak "rename_cols: No title_rx is defined!\n"
2247             unless defined $title_rx;
2248              
2249 0         0 my $title_row = $rows->[$title_rx];
2250              
2251 0         0 while (@_) {
2252 0         0 my $old_title = shift @_;
2253 0         0 my $new_title = shift @_;
2254 0         0 my $cx = $self->_spec2cx($old_title);
2255 0         0 $self->_logmethifv($old_title, \" -> ", $new_title, \" [cx $cx]");
2256 0 0       0 croak "rename_cols: Column $old_title is too large\n"
2257             if $cx > $#$title_row; # it must have been an absolute form
2258 0         0 $title_row->[$cx] = $new_title;
2259              
2260             # N.B. aliases remain pointing to the same columns regardless of names
2261             }
2262 0         0 $self->_rebuild_colx();
2263 0         0 $self
2264             }
2265              
2266             # apply {code}, colspec*
2267             # @_ are bound to the columns in the order specified (if any)
2268             # $_ is bound to the first such column
2269             # Only visit rows bounded by first_data_rx and/or last_data_rx,
2270             # starting with title_rx+1 if a title row is defined.
2271             sub apply(&;@) {
2272 1     1 1 6 my $self = &__selfmust;
2273 1         5 my ($code, @cols) = @_;
2274 1         9 my $hash = $$self;
2275 1         6 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2276              
2277 1   50     24 my $first_rx = max(($hash->{title_rx} // -1)+1, $hash->{first_data_rx}//0);
      50        
2278              
2279 1         10 @_ = ($self, $code, \@cxs, undef, $first_rx, $hash->{last_data_rx});
2280 1         19 goto &_apply_to_rows
2281             }
2282              
2283             # apply_all {code}, colspec*
2284             # Like apply, but ALL rows are visited, inluding the title row if any
2285             sub apply_all(&;@) {
2286 0     0 1 0 my $self = &__selfmust;
2287 0         0 my ($code, @cols) = @_;
2288 0         0 my $hash = $$self;
2289 0         0 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2290 0         0 log_methcall $self, [\"rx 0..",$#{$hash->{rows}},
2291             @cxs > 0 ? \(" cxs=".avis(@cxs)) : ()]
2292 0 0       0 if $$self->{verbose};
    0          
2293 0         0 @_ = ($self, $code, \@cxs);
2294 0         0 goto &_apply_to_rows
2295             }
2296              
2297             sub __arrify_checknotempty($) {
2298 5     5   18 local $_ = shift;
2299 5 100       46 my $result = ref($_) eq 'ARRAY' ? $_ : [ $_ ];
2300             croak "Invalid argument ",vis($_)," (expecting [array ref] or single value)\n"
2301 5 50 33     61 unless @$result > 0 && !grep{ref($_) || $_ eq ""} @$result;
  5 50       60  
2302 5         18 $result
2303             }
2304              
2305             # apply_torx {code} rx, colspec*
2306             # apply_torx {code} [rx list], colspec*
2307             # Only the specified row(s) are visited
2308             # first/last_data_rx are ignored.
2309             sub apply_torx(&$;@) {
2310 5     5 1 514 my $self = &__selfmust;
2311 5         35 my ($code, $rxlist_arg, @cols) = @_;
2312 5 50       72 croak "Missing rx (or [list of rx]) argument\n" unless defined $rxlist_arg;
2313 5         36 my $rxlist = __arrify_checknotempty($rxlist_arg);
2314 5         24 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2315             log_methcall $self, [\vis($rxlist_arg),
2316             @cxs > 0 ? \(" cxs=".avis(@cxs)) : ()]
2317 5 0       39 if $$self->{verbose};
    50          
2318 5         26 @_ = ($self, $code, \@cxs, $rxlist);
2319 5         72 goto &_apply_to_rows
2320             }
2321              
2322             # apply_exceptrx {code} [rx list], colspec*
2323             # All rows EXCEPT the specified rows are visited
2324             sub apply_exceptrx(&$;@) {
2325 0     0 1 0 my $self = &__selfmust;
2326 0         0 my ($code, $exrxlist_arg, @cols) = @_;
2327 0 0       0 croak "Missing rx (or [list of rx]) argument\n" unless defined $exrxlist_arg;
2328 0         0 my $exrxlist = __arrify_checknotempty($exrxlist_arg);
2329 0         0 my @cxs = map { scalar $self->_spec2cx($_) } @cols;
  0         0  
2330             log_methcall $self, [\vis($exrxlist_arg),
2331             @cxs > 0 ? \(" cxs=".avis(@cxs)) : ()]
2332 0 0       0 if $$self->{verbose};
    0          
2333 0         0 my $hash = $$self;
2334 0         0 my $max_rx = $#{ $hash->{rows} };
  0         0  
2335 0         0 foreach (@$exrxlist) {
2336 0 0 0     0 croak "rx $_ is out of range\n" if $_ < 0 || $_ > $max_rx;
2337             }
2338 0         0 my %exrxlist = map{ $_ => 1 } @$exrxlist;
  0         0  
2339 0         0 my $rxlist = [ grep{ ! exists $exrxlist{$_} } 0..$max_rx ];
  0         0  
2340 0         0 @_ = ($self, $code, \@cxs, $rxlist);
2341 0         0 goto &_apply_to_rows
2342             }
2343              
2344             # split_col {code} oldcol, newcol_start_position, new titles...
2345             # {code} is called for each row with $_ bound to <oldcol>
2346             # and @_ bound to the new column(s).
2347             # The old column is left as-is (not deleted).
2348             sub split_col(&$$$@) {
2349 0     0 1 0 my $self = &__selfmust;
2350 0         0 my ($code, $oldcol_posn, $newcols_posn, @new_titles) = @_;
2351              
2352 0         0 my $num_insert_cols = @new_titles;
2353 0         0 my $old_cx = $self->_spec2cx($oldcol_posn);
2354 0         0 my $newcols_first_cx = $self->_relspec2cx($newcols_posn);
2355              
2356             log_methcall $self, [\"... $oldcol_posn\[$old_cx] -> [$newcols_first_cx]",
2357             avis(@new_titles)]
2358 0 0       0 if $$self->{verbose};
2359              
2360 0         0 local $$self->{verbose} = 0;
2361              
2362 0         0 $self->insert_cols($newcols_first_cx, @new_titles);
2363              
2364 0 0       0 $old_cx += $num_insert_cols if $old_cx >= $newcols_first_cx;
2365              
2366 0         0 $self->apply($code,
2367             $old_cx, $newcols_first_cx..$newcols_first_cx+$num_insert_cols-1);
2368              
2369 0         0 $self
2370             }
2371              
2372             sub reverse_cols() {
2373 0     0 1 0 my $self = &__selfmust;
2374 0         0 my ($rows, $num_cols) = @$$self{qw/rows num_cols/};
2375 0 0       0 log_methcall $self, [] if $$self->{verbose};
2376 0         0 for my $row (@$rows) {
2377 0         0 @$row = reverse @$row;
2378             }
2379 0         0 $self->_adjust_colx([reverse 0..$num_cols-1]);
2380 0         0 $self
2381             }
2382              
2383             sub transpose() {
2384 0     0 1 0 my $self = &__selfmust;
2385 0 0       0 log_methcall $self, [] if $$self->{verbose};
2386              
2387 0         0 my ($rows, $old_num_cols, $linenums) = @$$self{qw/rows num_cols linenums/};
2388              
2389 0         0 $$self->{useraliases} = {};
2390 0         0 $$self->{title_rx} = undef;
2391 0         0 $$self->{first_data_rx} = undef;
2392 0         0 $$self->{last_data_rx} = undef;
2393              
2394             # Save a copy of the data
2395 0         0 my @old_rows = ( map{ [ @$_ ] } @$rows );
  0         0  
2396              
2397             # Rebuild the spreadsheet
2398 0         0 @$rows = ();
2399 0         0 $$self->{num_cols} = scalar @old_rows;
2400              
2401 0         0 for (my $ocx=0; $ocx < $old_num_cols; ++$ocx) {
2402 0         0 my @nrow;
2403 0         0 for my $row (@old_rows) {
2404 0   0     0 push @nrow, $row->[$ocx] // "";
2405             }
2406 0         0 push @$rows, \@nrow;
2407             }
2408 0 0       0 if ($$self->{saved_linenums}) {
2409 0         0 @$linenums = @{ $$self->{saved_linenums} };
  0         0  
2410 0         0 delete $$self->{saved_linenums};
2411             } else {
2412 0         0 $$self->{saved_linenums} = [ @$linenums ];
2413 0         0 @$linenums = ("?") x scalar @$rows;
2414             }
2415 0   0     0 $$self->{data_source} //= "";
2416 0         0 $$self->{data_source} .= " transposed";
2417              
2418 0         0 $self->_rows_replaced;
2419 0         0 $self
2420             }#transpose
2421              
2422             # delete_rows rx ...
2423             # delete_rows 'LAST' ...
2424             # delete_rows '$' ...
2425             sub delete_rows(@) {
2426 0     0 1 0 my $self = &__selfmust;
2427 0         0 my (@rowspecs) = @_;
2428              
2429             my ($rows, $linenums, $title_rx, $first_data_rx, $last_data_rx, $current_rx, $verbose)
2430 0         0 = @$$self{qw/rows linenums title_rx first_data_rx last_data_rx current_rx verbose/};
2431              
2432 0         0 foreach (@rowspecs) {
2433 0 0       0 $_ = $#$rows if /^(?:LAST|\$)$/;
2434 0         0 __validate_nonnegi($_, "rx to delete");
2435 0 0       0 croak "Invalid row index '$_'\n" unless $_ <= $#$rows;
2436             }
2437 0         0 my @rev_sorted_rxs = sort {$b <=> $a} @rowspecs;
  0         0  
2438 0 0       0 log_methcall $self, [reverse @rev_sorted_rxs] if $$self->{verbose};
2439              
2440             # Adjust if needed...
2441 0 0       0 if (defined $title_rx) {
2442 0         0 foreach (@rev_sorted_rxs) {
2443 0 0       0 if ($_ < $title_rx) { --$title_rx }
  0 0       0  
2444             elsif ($_ == $title_rx) {
2445             $self->_log("Invalidating titles because rx $title_rx is being deleted\n")
2446 0 0       0 if $$self->{verbose};
2447 0         0 $title_rx = undef;
2448 0         0 last;
2449             }
2450             }
2451 0         0 $$self->{title_rx} = $title_rx;
2452             }
2453 0 0       0 if (defined $first_data_rx) {
2454 0         0 foreach (@rev_sorted_rxs) {
2455 0 0       0 if ($_ <= $first_data_rx) { --$first_data_rx }
  0         0  
2456             }
2457 0         0 $$self->{first_data_rx} = $first_data_rx;
2458             }
2459 0 0       0 if (defined $last_data_rx) {
2460 0         0 foreach (@rev_sorted_rxs) {
2461 0 0       0 if ($_ <= $last_data_rx) { --$last_data_rx }
  0         0  
2462             }
2463 0         0 $$self->{last_data_rx} = $last_data_rx;
2464             }
2465              
2466             # Back up $current_rx to account for deleted rows.
2467             # $current_rx is left set to one less than the index of the "next" row if
2468             # we are in an apply(). That is, current_rx will be left still pointing to
2469             # the same row as before, or if that row has been deleted then the row
2470             # before that (or -1 if row zero was deleted).
2471 0 0       0 if (defined $current_rx) {
2472 0         0 foreach (@rev_sorted_rxs) {
2473 0 0       0 --$current_rx if ($_ <= $current_rx);
2474             }
2475 0         0 $$self->{current_rx} = $current_rx;
2476             }
2477              
2478             #warn "### BEFORE delete_rows rx (@rev_sorted_rxs):\n",
2479             # map( { " [$_]=(".join(",",@{$rows->[$_]}).")\n" } 0..$#$rows);
2480              
2481 0         0 for my $rx (@rev_sorted_rxs) {
2482 0         0 splice @$rows, $rx, 1, ();
2483 0         0 splice @$linenums, $rx, 1, ();
2484             }
2485              
2486             #warn "### AFTER delete_rows:\n",
2487             # map( { " [$_]=(".join(",",@{$rows->[$_]}).")\n" } 0..$#$rows);
2488             $self
2489 0         0 }#delete_rows
2490 0     0 0 0 sub delete_row($) { goto &delete_rows; }
2491              
2492             # $firstrx = insert_rows [rx [,count]]
2493             # $firstrx = insert_rows ['$'[,count]]
2494             sub insert_rows(;$$) {
2495 0     0 1 0 my $self = &__selfmust;
2496 0         0 my ($rx, $count) = @_;
2497 0   0     0 $rx //= 'END';
2498 0   0     0 $count //= 1;
2499              
2500             my ($rows, $linenums, $num_cols, $title_rx, $first_data_rx, $last_data_rx)
2501 0         0 = @$$self{qw/rows linenums num_cols title_rx first_data_rx last_data_rx/};
2502              
2503 0 0       0 $rx = @$rows if $rx =~ /^(?:END|\$)$/;
2504              
2505             log_methcall $self, [\"at rx $rx (count $count)"], [$rx]
2506 0 0       0 if $$self->{verbose};
2507              
2508 0         0 __validate_nonnegi($rx, "new rx");
2509              
2510 0 0 0     0 if (defined($title_rx) && $rx <= $title_rx) {
2511 0         0 $$self->{title_rx} = ($title_rx += $count);
2512             }
2513 0 0 0     0 if (defined($first_data_rx) && $rx <= $first_data_rx) {
2514 0         0 $$self->{first_data_rx} = ($first_data_rx += $count);
2515             }
2516 0 0 0     0 if (defined($last_data_rx) && $rx <= $last_data_rx) {
2517 0         0 $$self->{last_data_rx} = ($last_data_rx += $count);
2518             }
2519              
2520 0         0 for (1..$count) {
2521 0         0 splice @$rows, $rx, 0, [("") x $num_cols];
2522 0         0 splice @$linenums, $rx, 0, "??";
2523             }
2524              
2525 0         0 $rx;
2526             }
2527 0     0 1 0 sub insert_row(;$) { goto &insert_rows; }
2528              
2529             # read_spreadsheet $inpath [Spreadsheet::Edit::IO::OpenAsCSV options...]
2530             # read_spreadsheet $inpath [,iolayers =>... or encoding =>...]
2531             # read_spreadsheet $inpath [,{iolayers =>... or encoding =>... }] #OLD API
2532             # read_spreadsheet [{iolayers =>... or encoding =>... }, ] $inpath #NEW API
2533             #
2534             # Titles are auto-detected by default, but this may be controlled
2535             # via {OPTIONS}:
2536             # title_rx => rx # Don't autodetect; set the specified title_rx
2537             # title_rx => undef # Don't autodetect; no titles
2538             # OR: autodetect options, passed thru to title_rx()
2539             #
2540             sub read_spreadsheet($;@) {
2541 2     2 1 7 my ($self, $opthash, $inpath) = &__self_opthash_1arg;
2542 2         7 my $orig_opthash = { %$opthash };
2543              
2544 2         16 my $saved_stdopts = $self->_set_stdopts($opthash);
2545 2     2   17 scope_guard{ $self->_restore_stdopts($saved_stdopts) };
  2         9  
2546              
2547 2         11 my %csvopts = @sane_CSV_read_options;
2548             # Separate out Text::CSV options from %$opthash
2549 2         8 foreach my $key (Text::CSV::known_attributes()) {
2550             #$csvopts{$key} = delete $opthash{$key} if exists $opthash{$key};
2551 62 50       177 $csvopts{$key} = $opthash->{$key} if defined $opthash->{$key};
2552 62         91 delete $opthash->{$key};
2553             }
2554 2         15 $csvopts{escape_char} = $csvopts{quote_char}; # " : """
2555              
2556             croak "Obsolete {sheet} key in options (use 'sheetname')"
2557 2 50       5 if exists $opthash->{sheet};
2558              
2559 2         18 __validate_opthash( $opthash,
2560             [
2561             qw/title_rx/,
2562             qw/iolayers encoding verbose silent debug/,
2563             qw/tempdir use_gnumeric raw_values sheetname/, # for OpenAsCsv
2564             qw/required min_rx max_rx first_cx last_cx/, # for title_rx
2565             ],
2566             desc => "read_spreadsheet option",
2567             undef_ok => [qw/title_rx verbose silent debug use_gnumeric/] );
2568              
2569             # convert {encoding} to {iolayers}
2570 2 50       8 if (my $enc = delete $opthash->{encoding}) {
2571             #warn "Found OBSOLETE read_spreadsheet 'encoding' opt (use iolayers instead)\n";
2572 0   0     0 $opthash->{iolayers} = ($opthash->{iolayers}//"") . ":encoding($enc)";
2573             }
2574             # Same as last-used, if any
2575             # N.B. If user says nothing, OpenAsCsv() defaults to UTF-8
2576 2   50     20 $opthash->{iolayers} //= $$self->{iolayers} // "";
      33        
2577              
2578             my ($rows, $linenums, $meta_info, $verbose, $debug)
2579 2         7 = @$$self{qw/rows linenums meta_info verbose debug/};
2580              
2581             ##$self->_check_currsheet;
2582              
2583 2         11 my $hash;
2584 2         4 { local $$self->{verbose} = 0;
  2         18  
2585             $hash = OpenAsCsv(
2586             inpath => $inpath,
2587             debug => $$self->{debug},
2588             silent => $$self->{silent},
2589 2   33     22 verbose => ($$self->{verbose} || $$self->{debug}),
2590             %$opthash, # all our opts are valid here
2591             );
2592             }
2593              
2594             ### TODO: Split off the following into a separate read_csvdata() method
2595             ### which takes a file handle? This might be useful so users
2596             ### can open arbitrary sources (even a pipe) and parse the data
2597             ### (e.g. /etc/passwd with : as the separator).
2598             ### ...but unclear how to handle encoding
2599              
2600             # Save possibly-defaulted iolayers for use in subsequent write_csv
2601 2   33     17 $$self->{iolayers} //= $hash->{iolayers};
2602              
2603 2         4 my $fh = $hash->{fh};
2604              
2605 2         5 $csvopts{keep_meta_info} = 1;
2606 2 50       11 my $csv = Text::CSV->new (\%csvopts)
2607             or croak "read_spreadsheet: ".Text::CSV->error_diag ()
2608             .dvis('\n## %csvopts\n');
2609              
2610 2         415 undef $$self->{num_cols};
2611 2         23 @$rows = ();
2612 2         6 @$linenums = ();
2613 2         3 my $lnum = 1;
2614 2         82 while (my $F = $csv->getline( $fh )) {
2615 6         407 push(@$linenums, $lnum);
2616 6         26 my @minfo = $csv->meta_info();
2617             # Force quoting of fields which look like negative numbers with an ascii
2618             # minus (\x{2D}) rather than Unicode math minus (\N{U+2212}).
2619             # This prevents conversion to the Unicode math minus when LibreOffice
2620             # reads the CSV. The assumption is that if the input, when converted
2621             # TO a csv, has an ascii minus then the original spreadsheet cell format
2622             # was "text" not numeric.
2623 6         62 for my $cx (0..$#$F) {
2624             #...TODO $minfo[$cx] |= 0x0001 if $F->[$cx] =~ /^-[\d.]+$/a;
2625             }
2626 6         14 push(@$meta_info, \@minfo);
2627 6         18 $lnum = $.+1;
2628 6         20 push(@$rows, $F);
2629             }
2630 2 50       215 close $fh || croak "Error reading $hash->{csvpath}: $!\n";
2631              
2632             $$self->{data_source} =
2633 2         16 form_spec_with_sheetname($hash->{inpath}, $hash->{sheetname});
2634 2         13 $$self->{sheetname} = $hash->{sheetname}; # possibly undef
2635              
2636 2         9 $self->_rows_replaced;
2637              
2638             # Set title_rx, either to a value explicitly given in OPTIONS (possibly
2639             # undef, meaning no titles) or else auto-detect.
2640 2         4 my %autodetect_opts;
2641 2         4 foreach (qw/required min_rx max_rx first_cx last_cx/) {
2642 10 50       22 $autodetect_opts{$_} = $opthash->{$_} if exists($opthash->{$_});
2643             }
2644            
2645 2 100       6 my $arg = exists($opthash->{title_rx}) ? $opthash->{title_rx} : 'auto';
2646 2         3 { local $$self->{cmd_nesting} = $$self->{cmd_nesting} + 1;
  2         6  
2647 2         4 $autodetect_opts{verbose} = 0; # suppress logging
2648 2         7 $self->title_rx(\%autodetect_opts, $arg);
2649             }
2650              
2651             log_methcall $self, [$orig_opthash, $inpath,
2652             \" [title_rx set to ",vis($$self->{title_rx}),\"]"]
2653 2 50       10 if $$self->{verbose};
2654              
2655 2         36 $self
2656             }#read_spreadsheet
2657              
2658             # write_csv {OPTHASH} "/path/to/output.csv"
2659             # Cells will be quoted if the input was quoted, i.e. if indicated by meta_info.
2660             sub write_csv(*;@) {
2661 0     0 1 0 my $self = &__selfmust;
2662 0 0       0 my $opts = ref($_[0]) eq 'HASH' ? shift() : {};
2663 0         0 my $dest = shift;
2664              
2665 0         0 my %csvopts = ( @sane_CSV_write_options,
2666             quote_space => 0, # dont quote embedded spaces
2667             );
2668             # Separate out Text::CSV options from {OPTIONS}
2669 0         0 foreach my $key (Text::CSV::known_attributes()) {
2670 0 0       0 $csvopts{$key} = $opts->{$key} if defined $opts->{$key};
2671 0         0 delete $opts->{$key};
2672             }
2673              
2674 0         0 { my %notok = %$opts;
  0         0  
2675 0         0 delete $notok{$_} foreach (
2676             #removed above... Text::CSV::known_attributes(),
2677             qw/verbose silent debug/,
2678             );
2679 0 0       0 croak "Unrecognized OPTION(s): ",avislq(keys %notok) if %notok;
2680             }
2681              
2682 0   0     0 $opts->{iolayers} //= $$self->{iolayers} // "";
      0        
2683             # New API: opts->{iolayers} may have all 'binmode' arguments.
2684             # If it does not include encoding(...) then insert default
2685 0 0       0 if ($opts->{iolayers} !~ /encoding\(|:utf8/) {
2686 0   0     0 $opts->{iolayers} .= ":encoding(".
2687             ($self->input_encoding() || DEFAULT_WRITE_ENCODING)
2688             .")";
2689             }
2690 0 0       0 if ($opts->{iolayers} !~ /:(?:crlf|raw)\b/) {
2691             # Use platform default
2692             #$opts->{iolayers} .= ":crlf";
2693             }
2694              
2695             my ($rows, $meta_info, $num_cols, $verbose, $debug)
2696 0         0 = @$$self{qw/rows meta_info num_cols verbose debug/};
2697              
2698 0         0 my $fh;
2699 0 0       0 if (openhandle($dest)) { # an already-open file handle?
2700             log_methcall $self, [$opts, \("<file handle specified> $opts->{iolayers} "
2701             .scalar(@$rows)." rows, $num_cols columns)")]
2702 0 0       0 if $$self->{verbose};
2703 0         0 $fh = $dest;
2704             } else {
2705             log_methcall $self, [$opts, \($dest." $opts->{iolayers} ("
2706             .scalar(@$rows)." rows, $num_cols columns)")]
2707 0 0       0 if $$self->{verbose};
2708 0 0 0     0 croak "Output path suffix must be *.csv, not\n ",qsh($dest),"\n"
2709             if $dest =~ /\.([a-z]*)$/ && lc($1) ne "csv";
2710 0 0       0 open $fh,">$dest" or croak "$dest: $!\n";
2711             }
2712              
2713 0 0       0 binmode $fh, $opts->{iolayers} or die "binmode:$!";
2714              
2715             # Arrgh. Although Text::CSV is huge and complex and implements a complicated
2716             # meta_info mechanism to capture quoting details on input, there is no way to
2717             # use the captured info to specify quoting of output fields!
2718             # So we implement writing CSVs by hand here.
2719             #my $csv = Text::CSV->new (\%csvopts)
2720             # or die "write_csv: ".Text::CSV->error_diag ();
2721             #foreach my $row (@$rows) {
2722             # oops "UNDEF row" unless defined $row; # did user modify @rows?
2723             # $csv->print ($fh, $row);
2724             #};
2725              
2726             # 5/2/22 FIXME: Maybe meta_info could be used when writing, albiet in
2727             # a grotesque way:
2728             # If keep_meta_info is set > 9, then the output quotation style is
2729             # "like it was used in the input of the the last parsed record"; so
2730             # we could "parse" a dummy record to set the quote style before writing
2731             # each record, like this (see perldoc Text::CSV_XS "keep_meta_info"):
2732             # my $csv = Text::CSV_XS->new({ binary=>1, keep_meta_info=>11,
2733             # quote_space => 0 });
2734             # apply_all {
2735             # my $minfo = $meta_info[$rx];
2736             # my @dummy = map{ '', 'x', '""' or '"x'' } @$minfo; # HOW?
2737             # $csv->parse(join ",", @dummy); # set saved meta_info
2738             # $csv->print(*OUTHANDLE, $row);
2739             # }
2740             #
2741              
2742             # Much of the option handling code was copied from Text::CSV_PP.pm
2743             # which depends on default values of options we don't specify explicitly.
2744             # So create a Text::CSV object just to get the effective option values...
2745 0         0 { my $o = Text::CSV->new( \%csvopts );
  0         0  
2746 0         0 foreach my $key (Text::CSV::known_attributes()) {
2747 0         0 $csvopts{$key} = $o->{$key};
2748             }
2749             }
2750              
2751             my $re_esc = ($csvopts{escape_char} ne '' and $csvopts{escape_char} ne "\0")
2752 0 0 0     0 ? ($csvopts{quote_char} ne '') ? qr/(\Q$csvopts{quote_char}\E|\Q$csvopts{escape_char}\E)/ : qr/(\Q$csvopts{escape_char}\E)/
    0          
2753             : qr/(*FAIL)/;
2754 0         0 for my $rx (0..$#$rows) {
2755 0         0 my $row = $rows->[$rx];
2756 0         0 my $minfo = $meta_info->[$rx];
2757 0         0 my @results;
2758 0         0 for my $cx (0..$num_cols-1) {
2759 0         0 my $value = $row->[$cx];
2760 0 0       0 confess "ERROR: rx $rx, cx $cx : undef cell value" unless defined($value);
2761 0         0 my $mi = $minfo->[$cx]; # undef if input was missing columns in this row
2762             my $must_be_quoted = $csvopts{always_quote} ||
2763 0   0     0 (($mi//0) & 0x0001); # was quoted on input
2764 0 0       0 unless ($must_be_quoted) {
2765 0 0       0 if ($value eq '') {
2766 0 0       0 $must_be_quoted = 42 if $csvopts{quote_empty};
2767             } else {
2768 0 0       0 if ($csvopts{quote_char} ne '') {
2769 3     3   41 use bytes;
  3         7  
  3         64  
2770             $must_be_quoted=43 if
2771             ($value =~ /\Q$csvopts{quote_char}\E/) ||
2772             ($csvopts{sep_char} ne '' and $csvopts{sep_char} ne "\0" and $value =~ /\Q$csvopts{sep_char}\E/) ||
2773             ($csvopts{escape_char} ne '' and $csvopts{escape_char} ne "\0" and $value =~ /\Q$csvopts{escape_char}\E/) ||
2774             ($csvopts{quote_binary} && $value =~ /[\x00-\x1f\x7f-\xa0]/) ||
2775 0 0 0     0 ($csvopts{quote_space} && $value =~ /[\x09\x20]/);
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
2776             }
2777             }
2778             }
2779 0         0 $value =~ s/($re_esc)/$csvopts{escape_char}$1/g;
2780 0 0       0 if ($csvopts{escape_null}) {
2781 0         0 $value =~ s/\0/$csvopts{escape_char}0/g;
2782             }
2783 0 0       0 if ($must_be_quoted) {
2784 0         0 $value = $csvopts{quote_char} . $value . $csvopts{quote_char};
2785             }
2786 0 0       0 $fh->print($csvopts{sep_char}) unless $cx==0;
2787 0         0 $fh->print($value);
2788             }
2789 0         0 $fh->print($csvopts{eol});
2790             }
2791              
2792 0 0       0 if (! openhandle $dest) {
2793 0 0       0 close $fh || croak "Error writing $dest : $!\n";
2794             }
2795             $self
2796 0         0 }#write_csv
2797              
2798             # Write spreadsheet with specified column formats
2799             # {col_formats} is required
2800             # Unless {sheetname} is specified, the sheet name is the outpath basename
2801             # sans any suffix
2802             sub write_spreadsheet(*;@) {
2803 0     0 1 0 my ($self, $opts, $outpath) = &__self_opthash_1arg;
2804 0         0 my $colx = $$self->{colx};
2805              
2806 0 0       0 log_methcall $self, [$opts, $outpath] if $$self->{verbose};
2807              
2808             # {col_formats} may be [list of formats in column order]
2809             # or { COLSPEC => fmt, ..., __DEFAULT__ => fmt }
2810             # Transform the latter to the former...
2811 0   0     0 my $cf = $opts->{col_formats} // croak "{col_formats} is required";
2812 0 0       0 if (ref($cf) eq "HASH") {
2813 0         0 my ($default, @ary);
2814 0         0 while (my ($key, $fmt) = each %$cf) {
2815 0 0       0 ($default = $fmt),next if $key eq "__DEFAULT__";
2816 0   0     0 my $cx = $colx->{$key} // croak("Invalid COLSPEC '$key' in col_formats");
2817 0         0 $ary[$cx] = $fmt;
2818             }
2819 0 0       0 foreach (@ary) { $_ = $default if ! defined; }
  0         0  
2820 0         0 $cf = \@ary;
2821             }
2822 0         0 local $opts->{col_formats} = $cf;
2823              
2824 0         0 my ($csvfh, $csvpath) = tempfile(SUFFIX => ".csv");
2825 0         0 { local $$self->{verbose} = 0;
  0         0  
2826 0         0 $self->write_csv($csvfh, silent => 1, iolayers => ':encoding(UTF-8)',
2827             @sane_CSV_write_options);
2828             }
2829 0 0       0 close $csvfh or die "Error writing $csvpath : $!";
2830              
2831             # Default sheet name to output file basename sans suffix
2832 0   0     0 $opts->{sheetname} //= fileparse($outpath, qr/\.\w+/);
2833              
2834 0         0 convert_spreadsheet($csvpath,
2835             %$opts,
2836             iolayers => ':encoding(UTF-8)',
2837             cvt_from => "csv",
2838             outpath => $outpath,
2839             );
2840 0         0 $self
2841             }
2842              
2843             #====================================================================
2844             # These helpers are used by predefined magic sheet variables.
2845             # See code in Spreadsheet::Edit::import()
2846              
2847             # Return $self if during an apply, or if being examined by Data::Dumper ;
2848             # otherwise croak
2849             sub _onlyinapply {
2850 7     7   24 my ($self, $accessor) = @_;
2851 7 50       29 unless (defined $$self->{current_rx}) {
2852 0         0 for (my $lvl=2; ;$lvl++) {
2853 0   0     0 my $pkg = (caller($lvl))[0] || last;
2854 0 0 0     0 return $self
2855             if defined($pkg) && $pkg->isa("Data::Dumper") # perldoc UNIVERSAL
2856             }
2857 0         0 croak "Can't use $accessor now: Not during apply*\n"
2858             }
2859             $self
2860 7         17 }
2861             sub __getsheet($$$$) {
2862 9     9   33 my ($mutating, $pkg, $uvar, $onlyinapply) = @_;
2863 9         35 my $sheet = $pkg2currsheet{$pkg};
2864 9 50       33 croak("Modifying variable $uvar is not allowed\n")
2865             if $mutating;
2866 9 50       72 croak("Can not use $uvar: No sheet is currently valid for package $pkg\n")
2867             unless defined $sheet;
2868 9 100       56 $onlyinapply ? _onlyinapply($sheet, $uvar) : $sheet
2869             }
2870             sub _scal_tiehelper { # access a scalar sheet variable
2871 5     5   602 my($mutating, $pkg, $uvar, $ident, $onlyinapply) = @_;
2872 5         29 my $sheet = __getsheet($mutating, $pkg, $uvar, $onlyinapply);
2873 5 50       34 confess avisq(@_) unless exists $$sheet->{$ident};
2874 5         44 return \$$sheet->{$ident}; # return ref to the scalar
2875             }
2876             sub _aryelem_tiehelper { # access an element of an array sheet variable
2877 0     0   0 my($mutating, $pkg, $uvar, $index_ident, $array_ident, $onlyinapply) = @_;
2878             # E.g. for $title_row : index_ident="title_rx" and array_ident="rows"
2879 0         0 my $sheet = __getsheet($mutating, $pkg, $uvar, $onlyinapply);
2880 0   0     0 my $aref = $$sheet->{$array_ident} // oops dvisq '$array_ident @_'; # e.g. {rows}
2881 0   0     0 my $index = $$sheet->{$index_ident} // do{
2882 0 0 0     0 if ($index_ident eq "current_rx" or $index_ident eq "title_rx") {
2883             return \undef # During Data::Dumper inspection of current_row?
2884 0         0 }
2885 0         0 oops dvis '$array_ident $index_ident'; # otherwise it's a bug
2886             };
2887 0 0       0 oops(dvisq '@_ $index') if $index > $#$aref;
2888 0         0 return \$aref->[$index]; # return ref to scalar (the element in the array)
2889             }
2890             sub _refval_tiehelper { # access a sheet variable which is a ref of some kind
2891 4     4   1612261 my($mutating, $pkg, $uvar, $field_ident, $onlyinapply, $mutable) = @_;
2892 4 50       30 $mutating = 0 if $mutable;
2893 4         28 my $sheet = __getsheet($mutating, $pkg, $uvar, $onlyinapply);
2894 4         149 return $$sheet->{$field_ident}; # return the value, which is itself a ref
2895             }
2896              
2897             # Retrieve the sheet currently accessed by the functional API & tied globals
2898             # in the caller's package (or the specified package).
2899             # If an argument is passed, change the sheet to the specified sheet.
2900             #
2901             # Always returns the previous sheet (or undef)
2902             sub sheet(;$$) {
2903 14     14 1 539 my $opthash = &__opthash;
2904 14   66     66 my $pkg = $opthash->{package} // caller();
2905 14 50       51 oops if index($pkg,__PACKAGE__) >= 0; # not us or sub-pkg
2906 14 100       45 my $pkgmsg = $opthash->{package} ? " [for pkg $pkg]" : "";
2907 14         37 my $curr = $pkg2currsheet{$pkg};
2908 14   33     80 my $verbose = $opthash->{verbose} || ($curr && $$curr->{verbose});
2909 14 100       128 if (@_) {
2910 12         50 my $new = __validate_sheet_arg(shift @_);
2911 12 50       31 croak "Extraneous argument(s) in call to sheet()" if @_;
2912 12 100       79 if (defined $new) {
2913 9 50       38 oops if $$new->{cmd_nesting};
2914 9   33     48 $verbose ||= $$new->{verbose};
2915             }
2916              
2917 12 0       28 log_call [$opthash, \(" ".fmt_sheet($new)),
    50          
2918             \(u($curr) eq u($new)
2919             ? " [no change]"
2920             : " [previous: ".fmt_sheet($curr)."]"),
2921             \$pkgmsg]
2922             if $verbose;
2923              
2924 12         27 $pkg2currsheet{$pkg} = $new;
2925             } else {
2926 2 50       4 log_call [$opthash], [\fmt_sheet($curr), \$pkgmsg]
2927             if $verbose;
2928             }
2929 14         152 $curr
2930             }
2931              
2932             #====================================================================
2933             package
2934             Spreadsheet::Edit::RowsTie; # implements @rows and @$sheet
2935 3     3   6735 use parent 'Tie::Array';
  3         9  
  3         34  
2936              
2937 3     3   2154 use Carp;
  3         6  
  3         352  
2938             #our @CARP_NOT = qw(Tie::Indirect Tie::Indirect::Array
2939             # Tie::Indirect::Hash Tie::Indirect::Scalar);
2940 3         32 use Data::Dumper::Interp 6.004 qw/visnew
2941             vis viso avis alvis ivis dvis hvis hlvis
2942             visq visoq avisq alvisq ivisq dvisq hvisq hlvisq
2943 3     3   23 addrvis rvis rvisq u quotekey qsh qshlist qshpath/;
  3         66  
2944              
2945 3     3   4644 use Scalar::Util qw(looks_like_number weaken);
  3         7  
  3         2164  
2946 0     0   0 sub oops(@) { goto &Spreadsheet::Edit::oops }
2947              
2948             sub TIEARRAY {
2949 4     4   12 my ($classname, $sheet) = @_;
2950 4         20 my $o = bless [ [], $sheet], $classname;
2951 4         28 weaken $o->[1];
2952 4         11 $o
2953             }
2954             sub FETCH {
2955 47     47   81 my ($this, $index) = @_;
2956 47         82 my $aref = $this->[0];
2957 47 50 33     160 croak "Index ",u($index)," is invalid or out of range"
2958             unless $index >= 0 && $index <= $#$aref;
2959 47         170 $aref->[$index];
2960             }
2961             sub STORE {
2962 12     12   62 my ($this, $index, $val) = @_;
2963 12         22 my ($aref, $sheet) = @$this;
2964 12 50 33     58 croak "Index ",u($index)," is invalid or out of range"
2965             unless $index >= 0 && $index <= $#$aref+1;
2966 12 50       30 croak "Value must be a ref to array of cell values (not $val)"
2967             if ! Spreadsheet::Edit::__looks_like_aref($val);
2968             croak "Cell values may not be undef"
2969 12 50       30 if grep{! defined} @$val;
  30         69  
2970             croak "Cell values must be strings or numbers"
2971 12 50       23 if grep{ ref($_) && !looks_like_number($_) } @$val;
  30 50       79  
2972 12 50       95 if (my $num_cols = $$sheet->{num_cols}) {
2973 0 0       0 croak "New row must contain $num_cols cells (not ", $#$val+1, ")"
2974             if @$val != $num_cols;
2975             }
2976             # else (0 or undef) someone promises to set it later
2977              
2978             # Store a *copy* of the data to dispose of a Magicrow wrapper, if present
2979 12         45 my $cells = [ @$val ];
2980 12         49 $aref->[$index] = Spreadsheet::Edit::Magicrow->new($sheet, $cells);
2981             }
2982 245     245   453 sub FETCHSIZE { scalar @{ $_[0]->[0] } }
  245         947  
2983             sub STORESIZE {
2984 2     2   11 my ($this, $newlen) = @_;
2985 2         4 $#{ $this->[0] } = $newlen-1;
  2         11  
2986             }
2987             # End packageSpreadsheet::Edit::RowsTie
2988              
2989             #====================================================================
2990             package
2991             Spreadsheet::Edit::Magicrow;
2992              
2993 3     3   24 use Carp;
  3         8  
  3         322  
2994             our @CARP_NOT = qw(Spreadsheet::Edit);
2995 3     3   23 use Scalar::Util qw(weaken blessed looks_like_number);
  3         7  
  3         381  
2996 0     0   0 sub oops(@) { goto &Spreadsheet::Edit::oops }
2997 3     3   32 use Data::Dumper::Interp;
  3         17  
  3         16  
2998              
2999             sub new {
3000 12     12   24 my ($classname, $sheet, $cells) = @_;
3001 12         18 my %hashview; tie %hashview, __PACKAGE__, $cells, $sheet;
  12         38  
3002 12         71 bless \ [$cells, \%hashview], $classname;
3003             }
3004 110     110   149 use overload '@{}' => sub { ${ shift() }->[0] },
  110         309  
3005 4     4   12 '%{}' => sub { ${ shift() }->[1] },
  4         61  
3006             #'""' => sub { shift }, # defeats vis overload eval!
3007             #'0+' => sub { shift },
3008             #'==' => sub { my ($self, $other, $swap) = @_; $self == $other },
3009             #'eq' => sub { my ($self, $other, $swap) = @_; "$self" eq "$other" },
3010 3         50 fallback => 1, # for "" etc. FIXME: is this really ok?
3011 3     3   1849 ;
  3         10  
3012              
3013             sub TIEHASH {
3014 12     12   20 my ($pkg, $cells, $sheet) = @_;
3015 12         30 my $o = bless \ [$cells, $sheet], $pkg;
3016 12         96 weaken $$o->[1];
3017 12         29 $o
3018             }
3019             sub _cellref {
3020 4     4   10 my ($cells, $sheet) = @{ ${ shift() } }; # First arg is 'self'
  4         7  
  4         22  
3021 4         13 my $key = shift; # Second arg is key
3022 4         10 my $mutating = @_; # Third arg exists only for STORE
3023 4         14 my $colx = $$sheet->{colx};
3024 4         14 my $cx = $colx->{$key};
3025              
3026 4 50       21 if (! defined $cx) {
3027 0 0       0 exists($colx->{$key})
3028             or croak "'$key' is an unknown COLSPEC. The valid keys are:\n",
3029             $sheet->_fmt_colx();
3030             # Undef colx results from alias({optional => TRUE},...) which failed,
3031             # or from an alias which became invalid because the column was deleted.
3032 0 0       0 croak "Attempt to write to 'optional' alias '$key' which is currently NOT DEFINED"
3033             if $mutating;
3034             return \undef # Reading such a column returns undef
3035 0         0 }
3036 4   33     6 $cx <= $#{$cells}
  4         20  
3037             // croak "BUG?? key '$key' maps to cx $cx which is out of range!";
3038 4         56 \$cells->[$cx]
3039             }
3040             sub FETCH {
3041 4     4   23 ${ &_cellref }
  4         17  
3042             }
3043             sub STORE {
3044 0     0     my $r = &_cellref;
3045 0           $$r = shift;
3046             }
3047             sub NEXTKEY {
3048 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3049 0           each %{ $$sheet->{colx} }
  0            
3050             }
3051             sub FIRSTKEY {
3052 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3053 0           my $colx = $$sheet->{colx};
3054 0           my $a = scalar keys %$colx; # reset iterator
3055 0           each %$colx;
3056             }
3057             sub EXISTS {
3058 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3059 0           my $key = shift;
3060 0           exists $$sheet->{colx}->{$key}
3061             }
3062             sub SCALAR {
3063 0     0     my (undef, $sheet) = @{ ${ shift() } };
  0            
  0            
3064 0           scalar %{ $$sheet->{colx} }
  0            
3065             }
3066 0     0     sub DELETE { confess "DELETE not allowed for ".__PACKAGE__ }
3067 0     0     sub CLEAR { confess "CLEAR not allowed for ".__PACKAGE__ }
3068              
3069             # End package Spreadsheet::Edit::Magicrow;
3070             #====================================================================
3071              
3072             1;
3073             __END__
3074              
3075             =pod
3076              
3077             =encoding UTF-8
3078              
3079             =head1 NAME
3080              
3081             Spreadsheet::Edit - Slice and dice spreadsheets, optionally using tied variables.
3082              
3083             =head1 NON-OO SYNOPSIS
3084              
3085             use Spreadsheet::Edit qw(:all);
3086              
3087             # Examples assume a spreadsheet with these titles in the first row:
3088             # "Account Number" "Customer's Name" "Email" "Home-phone" "Income"
3089              
3090             read_spreadsheet "mailing_list.xls!Sheet1";
3091              
3092             # alias an identifier to a long or complicated title
3093             alias Name => qr/customer/i; # matches "Customer's Name"
3094              
3095             # ------------ without tied column variables -----------
3096              
3097             # Print the data
3098             printf "%20s %8s %8s %-13s %s\n", "Name","A/N","Income","Phone","Email";
3099             apply {
3100             printf "%20s %8d %8.2f %-13s %s\n",
3101             $crow{Name}, # this key is an explicit alias
3102             $crow{"Account Number"}, # ...actual title
3103             $crow{Income}, # ...actual title
3104             $crow{Home_phone}, # ...auto-generated alias
3105             $crow{Email} ; # ...actual title
3106             };
3107              
3108             # Randomly access rows.
3109             print "Row 42: Column 'C' is ", $rows[41]{C}, "\n";
3110             print "Row 42: Customer's Name is ", $rows[41]{Name}, "\n";
3111             print "Row 42: 3rd column is ", $rows[41][2], "\n";
3112              
3113             # Split the "Customer's Name" into separate FName and LName columns
3114             insert_cols '>Name', "FName", "LName";
3115             apply {
3116             ($crow{FName}, $crow{LName}) = ($crow{Name} =~ /(.*) (.*)/)
3117             or die logmsg "Could not parse Name"; # logmsg adds current row number
3118             };
3119             delete_cols "Name";
3120              
3121             # Sort by last name
3122             sort_rows { $a->{LName} cmp $b->{LName} };
3123              
3124             # ------------ using tied column variables -----------
3125              
3126             our $Name; # 'Name' is the explicit alias created above
3127             our $Account_Number; # Auto-generated alias for "Account Number"
3128             our $Home_phone; # ditto
3129             our $Income; # 'Income' is an actual title
3130             our $Email; # ditto
3131             our ($FName, $LName); # These columns do not yet exist
3132              
3133             tie_column_vars "Name", "Account_Number", qr/phone/, qr/^inc/i, "FName", "LName";
3134              
3135             # Print the data
3136             printf "%20s %8s %8s %-13s %s\n", "Name","A/N","Income","Phone","Email";
3137             apply {
3138             printf "%20s %8d %8.2f %-13s%s\n",
3139             $Name, $Account_Number, $Income, $Home_phone, $Email;
3140             };
3141              
3142             # Split the "Customer's Name" into separate FName and LName columns
3143             insert_cols '>Name', "FName", "LName";
3144             apply {
3145             ($FName, $LName) = ($Name =~ /^(\S+) (\S+)$/)
3146             or die logmsg "Could not parse Name";
3147             };
3148             delete_cols "Name";
3149              
3150             # Simple mail-merge
3151             use POSIX qw(strftime);
3152             apply {
3153             return
3154             if $Income < 100000; # not in our audience
3155             open SENDMAIL, "|sendmail -t -oi" || die "pipe:$!";
3156             print SENDMAIL "To: $FName $LName <$Email>\n";
3157             print SENDMAIL strftime("Date: %a, %d %b %y %T %z\n", localtime(time));
3158             print SENDMAIL <<EOF ;
3159             From: sales\@example.com
3160             Subject: Help for the 1%
3161              
3162             Dear $FName,
3163             If you have disposable income, we can help with that.
3164             Sincerely,
3165             Your investment advisor.
3166             EOF
3167             close SENDMAIL || die "sendmail failed ($?)\n";
3168             };
3169              
3170             # ------------ multiple sheets --------------
3171              
3172             our ($Foo, $Bar, $Income);
3173              
3174             read_spreadsheet "file1.csv";
3175             tie_column_vars; # tie all vars that ever become valid
3176              
3177             my $s1 = sheet undef ; # Save ref to current sheet & forget it
3178              
3179             read_spreadsheet "file2.csv"; # Auto-creates sheet bc current is undef
3180             tie_column_vars;
3181              
3182             my $s2 = sheet ; # Save ref to second sheet
3183              
3184             print "$Foo $Bar $Income\n"; # these refer to $s2, the current sheet
3185              
3186             sheet $s1 ;
3187             print "$FName $LName $Income\n"; # these now refer to the original sheet
3188              
3189             # ------------ create sheet from memory --------------
3190              
3191             my $s3 = new_sheet
3192             data_source => "my own data",
3193             rows => [
3194             ["This is a row before the title row" ],
3195             ["Full Name", "Address", "City", "State", "Zip" ],
3196             ["Joe Smith", "123 Main St", "Boston", "CA", "12345"],
3197             ["Mary Jones", "999 Olive Drive", "Fenton", "OH", "67890"],
3198             ],
3199             ;
3200             $s3=>title_rx(1);
3201             ...
3202              
3203             =head1 OO SYNOPSIS
3204              
3205             use Spreadsheet::Edit ();
3206              
3207             my $sheet = Spreadsheet::Edit->new();
3208             $sheet->read_spreadsheet("mailing_list.xls!sheet name");
3209             $sheet->alias( Name => qr/customer/i ); # matches "Customer's Name"
3210              
3211             # Randomly access rows.
3212             # Sheet objects, when used as an ARRAYref, act like \@rows
3213             print "Row 42: Name is ", $sheet->[41]{Name}, "\n";
3214             print "Row 42, Column 3 is ", $sheet->[41][2], "\n";
3215              
3216             # Print the data.
3217             # Sheet objects, when used as an HASHref, act like \%crow
3218             printf "%20s %8s %8s %-13s %s\n", "Name","A/N","Income","Phone","Email";
3219             $sheet->apply( sub{
3220             printf "%20s %8d %8.2f %-13s%s\n",
3221             $sheet->{Name},
3222             $sheet->{"Account Number"},
3223             $sheet->{Income},
3224             $sheet->{Home_phone},
3225             $sheet->{Email} ;
3226             });
3227              
3228             # Another way:
3229             $sheet->apply( sub{
3230             my $r = $sheet->crow();
3231             printf "%20s %8d %8.2f %-13s%s\n",
3232             $r->{Name}, $r->{"Account Number"}, $r->{Income},
3233             $r->{Home_phone}, $r->{Email} ;
3234             });
3235              
3236             # Another way:
3237             $sheet->apply( sub{
3238             my $r = $sheet->crow();
3239             printf "%20s %8d %8.2f %-13s%s\n",
3240             $r->[0], $r->[1], $r->[4], $r->[3], $r->[2] ;
3241             });
3242              
3243             # Split the "Customer's Name" into separate FName and LName columns
3244             $sheet->insert_cols('>Name', "FName", "LName");
3245             $sheet->apply( sub {
3246             my $r = $sheet->crow();
3247             ($r->{FName}, $r->{LName}) = ($r->{Name} =~ /(.*) (.*)/)
3248             or die Spreadsheet::Edit::logmsg("Could not parse Name");
3249             });
3250             $sheet->delete_cols( "Name" );
3251              
3252              
3253             =head1 INTRODUCTION
3254              
3255             =over
3256              
3257              
3258             Skip ahead to "FUNCTIONS" for a list of operations (OO methods are
3259             named the same).
3260              
3261             =back
3262              
3263             Columns may be referenced by title without knowing their absolute positions.
3264             Optionally global (package) variables may be tied to columns.
3265             Tabular data can come
3266             from Spreadsheets, CSV files, or your code, and be written similarly.
3267             Both Functional and Object-Oriented (OO) APIs are provided.
3268              
3269             A table in memory is stored in a C<sheet> object, which contains
3270             an array of rows, each of which is an array of cell values.
3271              
3272             Cells within a row may be accessed by name (e.g. column titles) or
3273             by absolute position (letter code like "A", "B" etc. or zero-based index).
3274              
3275             The usual paradigm is to iterate over rows applying a function
3276             to each, vaguely inspired by 'sed' and 'awk' (see C<apply> below).
3277             Random access is also supported.
3278              
3279             Tied variables can be used with the functional API. These variables
3280             refer to columns in the current row during a C<apply> operation.
3281              
3282             Note: Only cell I<values> are handled; there is no provision
3283             for processing formatting information from spreadsheets.
3284             The author has a notion to add support for formats,
3285             perhaps integrating with Spreadsheet::Read and Spreadsheet::Write
3286             or the packages they use. Please contact the author if you want to help.
3287              
3288             =head3 HOW TO IMPORT
3289              
3290             By default only functions are imported, but most people will
3291              
3292             use Spreadsheet::Edit ':all';
3293              
3294             to import functions and helper variables (see STANDARD SHEET VARIABLES
3295             and VARIABLES USED DURING APPLY).
3296              
3297             You can rename imported items using the '-as' notation shown in
3298             L<Exporter::Tiny::Manual::QuickStart>.
3299              
3300             Purely-OO applications can L<use Spreadsheet::Edit ();>.
3301              
3302             =head1 THE 'CURRENT SHEET'
3303              
3304             The I<Functions> and helper variables implicitly operate on a
3305             package-global "current sheet" object, which can be switched at will.
3306             OO I<Methods> operate on the C<sheet> object they are called on.
3307              
3308             Functions which operates on the "current sheet" have
3309             corresponding OO methods with the same names and arguments
3310             (note that method args must be enclosed by parenthesis).
3311              
3312             =head1 TIED COLUMN VARIABLES
3313              
3314             Package variables can refer directly to columns in the 'current sheet'
3315             during C<apply>. For example C<$Email> and C<$FName> in
3316             the SYNOPSIS above.
3317              
3318             See C<tie_column_vars> for details.
3319              
3320             =head1 THE FUNCTIONS
3321              
3322             In the following, {OPTIONS} refers to an optional first argument
3323             which, if present, is a hashref giving additional parameters.
3324             For example in
3325              
3326             read_spreadsheet {sheetname => 'Sheet1'}, '/path/to/file.xlsx';
3327              
3328             the {...} hash is optional and specifies the sheet name.
3329              
3330             =head2 $curr_sheet = sheet ;
3331              
3332             =head2 $prev_sheet = sheet $another_sheet ;
3333              
3334             =head2 $prev_sheet = sheet undef ;
3335              
3336             [Functional API only]
3337             Retrieve, change, or forget the 'current sheet' object used
3338             by the functional API.
3339              
3340             Changing the current sheet immediately changes what is referenced by
3341             tied column variables and STANDARD SHEET VARIABLES (described later).
3342              
3343             {OPTIONS} may specify C<< package => 'pkgname' >> to operate on the specified
3344             package instead of the caller's package (useful in library code).
3345              
3346             =head2 read_spreadsheet CSVFILEPATH
3347              
3348             =head2 read_spreadsheet SPREADSHEETPATH
3349              
3350             =head2 read_spreadsheet "SPREADSHEETPATH!SHEETNAME"
3351              
3352             Replace any existing data with content from the given file.
3353              
3354             The Functional API will create a new sheet object if
3355             there is no "current sheet".
3356              
3357             The file may be a .csv or any format supported by Libre Office or gnumeric.
3358              
3359             By default column titles are auto-detected and
3360             an exception is thrown if a plausible title row can not be found.
3361              
3362             {OPTIONS} may include:
3363              
3364             Auto-detection options:
3365              
3366             =over 2
3367              
3368             required => COLSPEC or [COLSPEC,...] # any required title(s)
3369             min_rx => NUM, # first rx which may contain the title row.
3370             max_rx => NUM, # maximum rx which may contain the title row.
3371             first_cx => NUM, # first column ix which must contain required titles
3372             last_cx => NUM, # last column ix which must contain required titles
3373              
3374             =back
3375              
3376             The first row is used which includes the C<required> title(s), if any,
3377             and has non-empty titles in all columns, or columns
3378             C<first_cx> through C<last_cx>.
3379              
3380             =over 2
3381              
3382             title_rx => rx # specify title row (first row is 0)
3383             title_rx => undef # no title row
3384              
3385             =back
3386              
3387             These disable auto-detection and explicitly specify the title row
3388             index, or with C<undef>, that there are no titles.
3389             See also the C<title_rx> function/method.
3390              
3391             Other options:
3392              
3393             =over 6
3394              
3395             =item sheetname => SHEETNAME
3396              
3397             Specify which sheet in a multi-sheet workbook (i.e. spreadsheet file) to read.
3398             Alternatively, the sheet name may be appended to the input
3399             path after '!' as shown in the example.
3400              
3401             If no SHEETNAME is given then the sheet which was "active" when the
3402             workbook was saved will be retrieved.
3403              
3404             =item silent => bool
3405              
3406             =item verbose => bool
3407              
3408             =item debug => bool
3409              
3410             Probably what you expect.
3411              
3412             =item Other C<< key => value >> pairs override details of CSV parsing.
3413              
3414             See L<Text::CSV>. UTF-8 encoding is assumed by default.
3415              
3416             =back
3417              
3418             Due to bugs in Libre/Open Office, spreadsheet files can not
3419             be read if LO/OO is currently running, even
3420             for unrelated purposes (see "BUGS").
3421             This problem does not occur with .csv files
3422              
3423             =head2 alias IDENT => COLSPEC, ... ;
3424              
3425             =head2 alias IDENT => qr/regexp/, ... ;
3426              
3427             Create alternate identifiers for specified columns.
3428              
3429             Each IDENT, which must be a valid Perl identifier, will henceforth
3430             refer to the specified column even if the identifier is the same
3431             as the title or letter code of a different column.
3432              
3433             C<$row{IDENT}> and a tied variable C<$IDENT> will refer to the specified column.
3434              
3435             Aliases automatically track the column if it's position changes.
3436              
3437             Regular Expressions are matched against titles only, and an exception is
3438             thrown if more than one title matches.
3439              
3440             Otherwise, COLSPECs may be titles, existing alias names, column letters, etc.
3441             (see "COLUMN SPECIFIERS" for details).
3442              
3443             The COLSPEC is evaluated before the alias is created, so
3444              
3445             alias B => "B";
3446              
3447             would make "B" henceforth refer to the column with title "B", if one exists,
3448             or the second column (treating COLSPEC as a letter-code), even if the column
3449             is later moved.
3450              
3451             RETURNS: The 0-based column indices of the aliased column(s).
3452              
3453             =head2 unalias IDENT, ... ;
3454              
3455             Forget alias(es). Any masked COLSPECs become usable again.
3456              
3457             =head2 tie_column_vars VARNAME, ...
3458              
3459             Create tied package variables (scalars) for use during C<apply>.
3460              
3461             Each variable is a scalar corresponding to a column, and reading or writing
3462             it accesses the corresponding cell in the row being visited during C<apply>.
3463              
3464             The '$' may be omitted in the VARNAME arguments to C<tie_column_vars>;
3465              
3466             You must separately declare these variables with C<our $NAME>,
3467             except in the special case described
3468             in "Use in BEGIN() or module import methods" later.
3469              
3470             The variable name itself implies the column it refers to.
3471              
3472             Variable names may be:
3473              
3474             =over
3475              
3476             =item * User-defined alias names (see "alias")
3477              
3478             =item * Titles which happen to be valid Perl identifiers
3479              
3480             =item * Identifiers derived from titles by replacing offending characters
3481             with underscrores (see "AUTOMATIC ALIASES"),
3482              
3483             =item * Spreadsheet column letters like "A", "B" etc.
3484              
3485             =back
3486              
3487             See "CONFLICT RESOLUTION" for how ambiguity is resolved.
3488              
3489             Multiple calls accumulate, including with different sheets.
3490              
3491             Variable bindings are dynamically evaluated during each access by using the
3492             variable's identifier as a COLSPEC with the 'current sheet' in your package.
3493             This means that it does not matter which sheet
3494             was 'current' when C<tie_column_vars> was called with a particular name;
3495             it only matters that the name of a tied variable is a valid COLSPEC in
3496             the 'current sheet' when that variable is referenced
3497             (otherwise a read returns I<undef> and a write throws an exception).
3498             [*Need clarification*]
3499              
3500             B<{OPTIONS}> may specify:
3501              
3502             =over
3503              
3504             =item package => "pkgname"
3505              
3506             Tie variables in the specified package instead of the caller's package.
3507              
3508             =item verbose => bool
3509              
3510             =item debug => bool
3511              
3512             Print trace messages.
3513              
3514             =back
3515              
3516             =head2 tie_column_vars ':all'
3517              
3518             With the B<:all> token I<all possible variables> are tied, corresponding
3519             to the aliases, titles, non-conflicting column letters etc. which exist
3520             for the current sheet.
3521              
3522             In addition, variables will be tied in the future I<whenever new identifiers
3523             become valid> (for example when a new C<alias> is created, column added,
3524             or another file is read into the same sheet).
3525              
3526             Although convenient this is B<insecure> because malicious
3527             titles could clobber unintended globals.
3528              
3529             If VARNAMES are also specified, those variables will be tied
3530             immediately even if not yet usable; an exception occurs if a tied variable
3531             is referenced before the corresponding alias or title exists.
3532             [*Need clarification* -- exception even for reads??]
3533              
3534             =head2 Use in BEGIN{} or module import methods
3535              
3536             C<tie_column_vars> B<imports> the tied variables into your module,
3537             or the module specified with package => "pkgname" in {OPTIONS}.
3538              
3539             It is unnecessary to declare tied variables if the import
3540             occurs before code is compiled which references the variables. This can
3541             be the case if C<tie_column_vars> is called in a BEGIN{} block or in the
3542             C<import> method of a module loaded with C<use>.
3543              
3544             L<Spreadsheet::Edit::Preload> makes use of this.
3545              
3546             =head2 $rowindex = title_rx ;
3547              
3548             Retrieve the current title row rx, or undef if there are no titles
3549              
3550             =head2 title_rx ROWINDEX ;
3551              
3552             Make the specified row be the title row. Titles in that row are
3553             immediately (re-)examined and the corresponding COLSPECs become valid,
3554             e.g. you can reference a column by it's title or a derived identifier.
3555              
3556             Note: Setting C<title_rx> this way is rarely needed because
3557             by default C<read_spreadsheet> sets the title row.
3558              
3559             =head2 title_rx undef ;
3560              
3561             Titles are disabled and any existing COLSPECs derived from
3562             titles are invalidated.
3563              
3564             =head2 title_rx {AUTODETECT_OPTIONS} 'auto';
3565              
3566             Immediately perform auto-detection of the title row using
3567             C<< {AUTODETECT_OPTIONS} >> to modify any existing auto-detect options
3568             (see C<read_spreadsheet> for a description of the options).
3569              
3570              
3571             =head2 apply {code} [COLSPEC*] ;
3572              
3573             =head2 apply_all {code} [COLSPEC*] ;
3574              
3575             =head2 apply_torx {code} RX-OR-RXLIST [,COLSPEC*] ;
3576              
3577             =head2 apply_exceptrx {code} RX-OR-RXLIST [,COLSPEC*] ;
3578              
3579             Execute the specified code block (or referenced sub) once for each row.
3580              
3581             Note that there is no comma after a bare {code} block.
3582              
3583             While executing your code, tied column variables and
3584             the sheet variables C<@crow>, C<%crow>, C<$rx> and C<$linenum>
3585             and corresponding OO methods will refer to the row being visited.
3586              
3587             If a list of COLSPECs is specified, then
3588              
3589             =over 2
3590              
3591             @_ is bound to the columns in the order specified
3592             $_ is bound to the first such column
3593              
3594             =back
3595              
3596             C<apply> normally visits all rows which follow the title row, or all rows
3597             if there is no title row.
3598             C<first_data_rx> and C<last_data_rx>, if defined, further limit the
3599             range visited.
3600              
3601             C<apply_all> unconditionally visits every row, including any title row.
3602              
3603             C<apply_torx> or C<apply_exceptrx> visit exactly the indicated rows.
3604             RX-OR-RXLIST may be either a single row index or a [list of rx];
3605              
3606             Rows may be safely inserted or deleted during 'apply';
3607             rows inserted after the currently-being-visited row will be visited
3608             at the proper time.
3609              
3610             Nested and recursive C<apply>s are allowed.
3611             When an 'apply' changes the 'current sheet',
3612             tied variables then refer to the other sheet and
3613             any C<apply> active for that sheet.
3614             With nested 'apply's, take care restore the original sheet before returning
3615             (perhaps using C<Guard::scope_guard>).
3616              
3617             B<MAGIC VARIABLES USED DURING APPLY>
3618              
3619             These variables refer to the row currently being visited:
3620              
3621             =over 2
3622              
3623             B<@crow> is an array aliased to the current row's cells.
3624              
3625             B<%crow> is a hash aliased to the same cells,
3626             indexed by alias, title, letter code, etc. (any COLSPEC).
3627              
3628             B<$rx> is the 0-based index of the current row.
3629              
3630             B<$linenum> is the starting line number of the current row if the
3631             data came from a .csv file.
3632              
3633             For example, the "Account Number" column in the SYNOPSIS may be accessed
3634             many ways:
3635              
3636             alias AcNum => "Account Number";
3637             apply {
3638              
3639             $crow{"Account Number"} # %crow indexed by title
3640             $crow{AcNum} # using an explicit alias
3641             $crow{Account_Number} # using the AUTOMATIC ALIAS
3642              
3643             $crow[ $colx{"Account Number"} ]; # @crow indexed by a 0-based index
3644             $crow[ $colx{"AcNum"} ]; # ...obtained from %colx
3645             $crow[ $colx{"Account_Number"} ]; #
3646              
3647             $rows[$rx]->[ $colx{Account_Number} ] # Directly accessing @rows
3648              
3649             # See "TIED COLUMN VARIABLES" for a sweeter alternative
3650             };
3651              
3652             =back
3653              
3654             =head2 delete_col COLSPEC ;
3655              
3656             =head2 delete_cols COLSPEC+ ;
3657              
3658             The indicated columns are removed. Remaining title bindings
3659             are adjusted to track shifted columns.
3660              
3661             =head2 only_cols COLSPEC+ ;
3662              
3663             All columns I<except> the specified columns are deleted.
3664              
3665             =head2 move_col POSITION, SOURCE ;
3666              
3667             =head2 move_cols POSITION, SOURCES... ;
3668              
3669             Relocate the indicated column(s) (C<SOURCES>) so they are adjacent, in
3670             the order specified, starting at the position C<POSITION>.
3671              
3672             POSITION may be ">COLSPEC" to place moved column(s)
3673             immediately after the indicated column (">$" to place at the end),
3674             or POSITION may directly specify the destination column
3675             using an unadorned COLSPEC.
3676              
3677             A non-absolute COLSPEC indicates the I<initial> position of the referenced column.
3678              
3679             =head2 insert_col POSITION, newtitle ;
3680              
3681             =head2 insert_cols POSITION, newtitles... ;
3682              
3683             One or more columns are created starting at a position
3684             specified the same way as in C<move_cols> (later columns
3685             are moved rightward).
3686              
3687             POSITION may be ">$" to place new column(s) at the far right.
3688              
3689             A new title must be specified for each new column.
3690             If there is no title row, specify C<undef> for each position.
3691              
3692             Returns the new column index or indices.
3693              
3694             =head2 split_col {code} COLSPEC, POSITION, newtitles... ;
3695              
3696             New columns are created starting at POSITION as with C<insert_cols>,
3697             and populated with data from column COLSPEC.
3698              
3699             C<{code}> is called for each row with $_ bound to the cell at COLSPEC
3700             and @_ bound to cell(s) in the new column(s). It is up to your code to
3701             read the old column ($_) and write into the new columns (@_).
3702              
3703             The old column is left as-is (not deleted).
3704              
3705             If there is no title row, specify C<undef> for each new title.
3706              
3707             =head2 sort_rows {rx cmp function}
3708              
3709             =head2 sort_rows {rx cmp function} $first_rx, $last_rx
3710              
3711             If no range is specified, then the range is the
3712             same as for C<apply> (namely: All rows after the title row unless
3713             limited by B<first_data_rx> .. B<last_data_rx>).
3714              
3715             In the comparison function, globals $a and $b will contain row objects, which
3716             are dual-typed to act as either an array or hash ref to the cells
3717             in their row. The corresponding original row indicies are also passed
3718             as parameters in C<@_>.
3719              
3720             Rows are not actually moved until after all comparisons have finished.
3721              
3722             RETURNS: A list of the previous row indicies of all rows in the sheet.
3723              
3724             # Sort on the "LName" column using row indicies
3725             # (contrast with the example in SYNOPSIS which uses $a and $b)
3726             sort_rows { my ($rxa, $rxb) = @_;
3727             $rows[$rxa]{LName} cmp $rows[$rxb]{LName}
3728             };
3729              
3730             =head2 rename_cols COLSPEC, "new title", ... ;
3731              
3732             Multiple pairs may be given. Title cell(s) are updated as indicated.
3733              
3734             Existing user-defined aliases are I<not> affected, i.e.,
3735             they continue to refer to the same columns as before even if the titles changed.
3736              
3737             =head2 join_cols_sep STRING COLSPEC+ ;
3738              
3739             =head2 join_cols {code} COLSPEC+ ;
3740              
3741             The specified columns are combined into the first-specified column and the other
3742             columns are deleted.
3743              
3744             The first argument of C<join_cols_sep> should be a fixed separator.
3745             The first argument of C<join_cols> may be a {code} block or subref;
3746              
3747             If a separator string is specified it is used to join column content
3748             together.
3749              
3750             If a {code} block or sub ref is specified,
3751             it is executed once for each row following the title row,
3752             with $_ bound to the first-named column, i.e. the surviving column,
3753             and @_ bound to all named columns in the order given.
3754              
3755             It is up to your code to combine the data by reading
3756             @_ and writing $_ (or, equivalently, by writing $_[0]).
3757              
3758             C<first_data_rx> and C<last_data_rx> are ignored, and the title
3759             is I<not> modified.
3760              
3761             =head2 reverse_cols
3762              
3763             The order of the columns is reversed.
3764              
3765             =head2 insert_row
3766              
3767             =head2 insert_row 'END' [,$count]
3768              
3769             =head2 insert_rows $rowx [,$count]
3770              
3771             Insert one or more empty rows at the indicated position
3772             (default: at end). C<$rowx>, if specified, is either a 0-based offset
3773             for the new row or 'END' to add the new row(s) at the end.
3774             Returns the index of the first new row.
3775              
3776             =head2 delete_rows $rowx,... ;
3777              
3778             =head2 delete_rows 'LAST',... ;
3779              
3780             The indicated data rows are deleted. C<$rowx> is a zero-based row index
3781             or the special token "LAST" to indicate the last row (same as C<$#rows>).
3782             Any number of rows may be deleted in a single command, listed individually.
3783              
3784             =for Pod::Coverage delete_row
3785              
3786             =head2 transpose
3787              
3788             Invert the relation, i.e. rotate and flip the table.
3789             Cells A1,B1,C1 etc. become A1,A2,A3 etc.
3790             Any title_rx is forgotten.
3791              
3792             =head2 $href = read_workbook SPREADSHEETPATH
3793              
3794             **NOT YET IMPLEMENTED**
3795              
3796             [Function only, not callable as a method]
3797             All sheets in the specified document are read into memory
3798             without changing the 'current sheet'. A hashref is returned:
3799              
3800             {
3801             "sheet name" => (Spreadsheet::Edit object),
3802             ...for each sheet in the workbook...
3803             }
3804              
3805             To access one of the workbook sheets, execute
3806              
3807             sheet $href->{"sheet name"}; # or call OO methods on it
3808              
3809             If SPREADSHEETPATH was a .csv file then the resulting hash will have only
3810             one member with an indeterminate key.
3811              
3812             =head2 new_sheet
3813              
3814             [functional API only]
3815             Create a new empty sheet and make it the 'current sheet', returning the
3816             sheet object.
3817              
3818             Rarely used because a new sheet is automatically created by
3819             C<read_spreadsheet> if your package has no current sheet.
3820              
3821             {OPTIONS} may include:
3822              
3823             =over 6
3824              
3825             =item data_source => "text..."
3826              
3827             This string will be returned by the C<data_source> method,
3828             overriding any default.
3829              
3830             =item rows => [[A1_value,B1_value,...], [A2_value,B2_value,...], ...],
3831              
3832             =item linenums => [...] #optional
3833              
3834             This makes the C<sheet> object hold data already in memory.
3835             The data should not be modified directly while the sheet C<object> exists.
3836              
3837             =item clone => $existing_sheet
3838              
3839             A deep copy of an existing sheet is made.
3840              
3841             =item num_cols => $number # with no initial content
3842              
3843             An empty sheet is created but with a fixed number of columns.
3844             When rows are later created they will be immediately padded with empty cells
3845             if necessary to this width.
3846              
3847             =back
3848              
3849              
3850             =head2 write_csv *FILEHANDLE
3851              
3852             =head2 write_csv $path
3853              
3854             Write the current data to the indicated path or open file handle as
3855             a CSV text file.
3856             The default encoding is UTF-8 or, if C<read_spreadsheet> was most-recently
3857             used to read a csv file, the encoding used then.
3858              
3859             {OPTIONS} may include
3860              
3861             =over 6
3862              
3863             =item options for Text::CSV
3864              
3865             Usually none need be specified because we supply sane defaults.
3866              
3867             =item silent => bool
3868              
3869             =item verbose => bool
3870              
3871             =item debug => bool
3872              
3873             =back
3874              
3875             =head2 write_spreadsheet OUTPUTPATH
3876              
3877             Write the current data to a spreadsheet (.ods, .xlsx, etc.) by
3878             first writing to a temporary CSV file and then importing that file into
3879             a new spreadsheet.
3880              
3881             {OPTIONS} may include
3882              
3883             =over 6
3884              
3885             =item col_formats => [ LIST ]
3886              
3887             EXPERIMENTAL, likely to change when Spreadsheet::Read is integrated!
3888              
3889             Elements of LIST may be "" (Standard), "Text", "MM/DD/YY", "DD/MM/YY", or
3890             "YY/MM/DD" to indicate the format of the corresponding column. The meaning
3891             of "Standard" is not well documented but appears to mean "General Number"
3892             in most cases. For details, see "Format Codes" in L<this old Open Office
3893             documentation|https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options#Filter_Options_for_the_CSV_Filter>.
3894              
3895             =item silent => bool
3896              
3897             =item verbose => bool
3898              
3899             =item debug => bool
3900              
3901             =back
3902              
3903              
3904             =head2 options NAME => EXPR, ... ;
3905              
3906             =head2 options NAME ;
3907              
3908             Set or retrieve miscellaneous sheet-specific options.
3909             When setting, the previous value of
3910             the last option specified is returned. The only options currently defined
3911             are I<silent>, I<verbose> and I<debug>.
3912              
3913             =head2 $hash = attributes ;
3914              
3915             Returns a reference to a hash in which you may store arbitrary data
3916             associated with the sheet object.
3917              
3918             =head2 spectocx COLSPEC or qr/regexp/, ... ;
3919              
3920             Returns the 0-based indicies of the specified colomn(s).
3921             Throws an exception if there is no such column.
3922             A regexp may match multiple columns.
3923             See also C<%colx>.
3924              
3925             =head2 logmsg [FOCUSARG,] string, string, ...
3926              
3927             (must be explicitly imported)
3928              
3929             Concatenate strings, prefixed by a description
3930             of the 'current sheet' and row during C<apply>, if any (or with the
3931             sheet and/or row given by FOCUSARG).
3932              
3933             The resulting string is returned, with "\n" appended if it was not
3934             already terminated by a newline.
3935              
3936             The first argument is used as FOCUSARG if it is
3937             a sheet object, [sheet_object], or [sheet_object, rowindex], and specifies
3938             the sheet and/or row to describe in the message prefix.
3939             Otherwise the first argument is not special and is simply
3940             the first message string.
3941              
3942             The details of formatting the sheet may be customized with a call-back
3943             given by a C<{logmsg_pfx_gen}> attribute. See comments
3944             in the source for how this works.
3945              
3946             =head1 STANDARD SHEET VARIABLES
3947              
3948             These variables magically access the 'current sheet' in your package.
3949              
3950             =over
3951              
3952             =item @rows
3953              
3954             The spreadsheet data as an array of row objects.
3955              
3956             Each row object is "dual-typed" (overloaded) to act as either an ARRAY or HASH
3957             reference to the cells in that row.
3958              
3959             When used as a HASH ref, the key may be a
3960             alias, column title, letter-code etc. (any COLSPEC).
3961             When used as an ARRAY ref, the 0-based index specifies the column.
3962              
3963             =item @linenums
3964              
3965             The first line numbers of corresponding rows (a row can contain
3966             multiple lines if cells contain embedded newlines). Valid only if
3967             the data came from a CSV file.
3968              
3969             =item $num_cols
3970              
3971             The number of columns in the widest input row. Shorter rows are
3972             padded with empty cells when read so that all rows have the same number
3973             of columns in memory.
3974              
3975             =item $title_rx and $title_row
3976              
3977             C<$title_rx> contains the 0-based row index of the title row
3978             and C<$title_row> is an alias for C<$rows[ $title_rx ]>.
3979              
3980             The title row is auto-detected by default.
3981             See C<read_spreadsheet> and C<title_rx> for how to control this.
3982              
3983             If a column title is modified, set C<$title_rx = undef;> to force re-detection.
3984              
3985             =item $first_data_rx and $last_data_rx
3986              
3987             Optional limits on the range of rows visited by C<apply()>
3988             or sorted by C<sort_rows()>. By default $first_data_rx
3989             is the first row following the title row (or 0 if no title row).
3990              
3991             =item %colx (column key => column index)
3992              
3993             C<< %colx >> maps aliases, titles, etc. (all currently-valid COLSPECs)
3994             to the corresponding zero-based column indicies. See "COLSPECS" .
3995              
3996             =item %colx_desc (column key => "debugging info")
3997              
3998             =back
3999              
4000             =head1 COLSPECs (COLUMN SPECIFIERS)
4001              
4002             Arguments which specify columns may be:
4003              
4004             =over
4005              
4006             =item (1) a user-defined alias identifier
4007              
4008             =item (2) an actual "column title" **
4009              
4010             =item (3) an actual title with any leading & trailing spaces removed *
4011              
4012             =item (4) an AUTOMATIC ALIAS identifier *
4013              
4014              
4015             =item (6) a Regexp (qr/.../) which matches an actual title
4016              
4017             =item (7) a numeric column index (0-based)
4018              
4019             =item (8) '^' or '$' (means first or last column, respectively)
4020              
4021              
4022             =back
4023              
4024             *These may only be used if they do not conflict with an
4025             item listed higher up.
4026              
4027             **Titles may be used directly if they can not be confused with
4028             a user-defined alias, the special names '^' or '$' or a numeric
4029             column index. See "CONFLICT RESOLUTION".
4030              
4031             B<AUTOMATIC ALIASES> are Perl I<identifiers> derived from column titles by
4032             first removing leading or trailing spaces, and then
4033             replacing non-word characters with underscores and prepending
4034             an underscore if necessary.
4035             For example:
4036              
4037             Title Automatic Alias
4038              
4039             "Address" Address (no change needed)
4040             " First Name " First_Name
4041             "First & Last" First___Last
4042             "+sizes" _sizes
4043             "1000s" _1000s (underscore avoids leading digit)
4044              
4045             Aliases (both automatic and user-defined) are valid identifiers,
4046             so can be used as the names of tied variables,
4047             bareword keys to C<%colx> and C<%crow>, and related OO interfaces,
4048              
4049             CONFLICT RESOLUTION
4050              
4051             A conflict occurs when a column key potentially refers to multiple
4052             columns. For example, "A", "B" etc. are standard column
4053             names, but they might also be the actual titles of other columns.
4054             Warnings are printed about conflicts unless the B<silent> option
4055             is true (see C<options>).
4056              
4057             =over
4058              
4059             B<User alias identifiers> (defined using C<alias>) are always valid.
4060              
4061             '^' and '$' always refer to the first and last column.
4062              
4063             Numeric "names" 0, 1, etc. always give a 0-based column index
4064             if the value is between 0 and num_cols (i.e. one past the end).
4065              
4066             B<Actual Titles> refer to to their columns, except if they:
4067              
4068             =over
4069              
4070             are the same as a user-defined alias
4071              
4072             are '^' or '$'
4073              
4074             consist only of digits (without leading 0s) corresponding
4075             to a valid column index.
4076              
4077             =back
4078              
4079             B<Automatic Aliases> and B<Standard column names> ("A", "B", etc.)
4080             are available as column keys
4081             unless they conflict with a user-defined alias or an actual title.
4082              
4083             =back
4084              
4085             Note: To unconditionally refer to numeric titles or titles which
4086             look like '^' or '$', use a Regexp B<qr/.../>.
4087             Automatic Aliases can also refer to such titles if there are no conflicts.
4088              
4089             Column positions always refer to the data before a command is
4090             executed. This is relevant for commands which re-number or delete columns.
4091              
4092             =head1 OO DESCRIPTION (OBJECT-ORIENTED INTERFACE)
4093              
4094             All the Functions listed above (except for C<new_sheet>) have
4095             corresponding methods with the same arguments.
4096              
4097             However method arguments must be enclosed in parenthesis
4098             and bare {code} blocks may not be used; a sub{...} ref
4099             should be passed to C<apply> etc.
4100              
4101             =head1 OO-SPECIFIC METHODS
4102              
4103             =head2 Spreadsheet::Edit->new(OPTIONS...)
4104              
4105             Creates a new "sheet" object.
4106              
4107             OPTIONS are the same as described for the C<new_sheet> Function above,
4108             except that they may be specified as key => value pairs of arguments
4109             instead of (or in addition to) an {OPTIONS} hashref.
4110              
4111             =head2 $sheet->rows() ; # Analogous to to \@rows
4112              
4113             =head2 $sheet->linenums() ; # Analogous to \@linenums
4114              
4115             =head2 $sheet->num_cols() ; # Analogous to $num_cols
4116              
4117             =head2 $sheet->colx() ; # Analogous to \%colx
4118              
4119             =head2 $sheet->colx_desc() ; # Analogous to \%colx_desc
4120              
4121             =head2 $sheet->first_data_rx() ; # Analogous to $first_data_rx
4122              
4123             =head2 $sheet->last_data_rx() ; # Analogous to $last_data_rx
4124              
4125             =head2 $sheet->title_rx() ; # Analogous to to $title_rx
4126              
4127             =head2 $sheet->title_row() ; # Analogous to $title_row
4128              
4129             =head2 $sheet->rx() ; # Current rx in apply, analogous to to $rx
4130              
4131             =head2 $sheet->title_rx(rxvalue) ; # Analogous to assigning to $title_rx (changes title row)
4132              
4133             =head2 $sheet->crow(); # Current row in apply (a dual-typed row object)
4134              
4135             =head2 $sheet->linenum() ; # Analogous to to $linenum
4136              
4137             =head2 $sheet->get(rx,ident) ; # Analogous to to $rows[rx]{ident}
4138              
4139             =head2 $sheet->set(rx,ident,value); # Analogous to to $rows[rx]{ident} = value
4140              
4141             =head2 $sheet->data_source(); # Returns "description of sheet" (e.g. path read)
4142              
4143             =head2 $sheet->sheetname(); # valid if input was a spreadsheet, else undef
4144              
4145             =head2
4146              
4147              
4148             =head1 SEE ALSO
4149              
4150             L<Spreadsheet::Edit::Preload>
4151              
4152             =head1 BUGS
4153              
4154             Some vestigial support for formats remains from an earlier implementation,
4155             but this support is likely to be entirely replaced at some point.
4156              
4157             Reading a spreadsheet (but not csv) may fail if I<Libre Office>
4158             or I<Open Office> are currently running for any purpose; this seems to be
4159             a bug or limitation where batch-mode operations share the same profile as
4160             interactive sessions. In any case, I<ssconvert> (gnumeric) will be used
4161             if it is installed, and does not have this limitation.
4162             In the future Spreadsheet::Read might be used instead of external programs,
4163             although it uses Twig and is quite a bit slower.
4164              
4165             =head1 THREAD SAFETY
4166              
4167             Unknown, and probably not worth the trouble to find out.
4168              
4169             =head1 FUTURE IDEAS
4170              
4171             =over 4
4172              
4173             =item Add format-processing support.
4174              
4175             =item Add "column-major" views.
4176              
4177             This would allow accessing a whole column as an array.
4178             Perhaps C<@cols> and C<%cols> would be sets of column arrays
4179             (@cols indexed by column index, %cols indexed by any COLSPEC).
4180             And C<tie_column_vars '@NAME'> would tie user array variables to columns.
4181              
4182             =back
4183              
4184             =head1 AUTHOR
4185              
4186             Jim Avera (jim.avera at gmail)
4187              
4188             =head1 LICENSE
4189              
4190             Public Domain or CC0.
4191              
4192             =for Pod::Coverage meta_info
4193              
4194             =for Pod::Coverage iolayers input_encoding
4195              
4196             =for Pod::Coverage oops btw fmt_sheet fmt_list
4197              
4198             =for Pod::Coverage to_aref to_array to_wanted to_hash
4199              
4200             =for Pod::Coverage tied_varnames title2ident let2cx cx2let
4201              
4202             =cut
4203