File Coverage

blib/lib/Statistics/Data.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package Statistics::Data;
2 8     8   119182 use strict;
  8         12  
  8         227  
3 8     8   27 use warnings FATAL => 'all';
  8         12  
  8         303  
4 8     8   30 use Carp qw(carp croak);
  8         10  
  8         522  
5 0           use List::AllUtils qw(all first)
6 8     8   6200 ; # i.e., single method 'all', not ':all' methods
  0            
7             use Number::Misc qw(is_even);
8             use Scalar::Util qw(looks_like_number);
9             use String::Util qw(hascontent nocontent);
10             our $VERSION = '0.11';
11            
12             =head1 NAME
13            
14             Statistics::Data - Load, access, update one or more data lists for statistical analysis
15            
16             =head1 VERSION
17            
18             This is documentation for B of Statistics/Data.pm, released Jan 2017.
19            
20             =head1 SYNOPSIS
21            
22             use Statistics::Data 0.11;
23             my $dat = Statistics::Data->new();
24            
25             # with named arrays:
26             $dat->load({'aname' => \@data1, 'anothername' => \@data2}); # names are arbitrary
27             $aref = $dat->access(name => 'aname'); # gets back a copy of @data1
28             $dat->add('aname' => [2, 3]); # pushes new values onto loaded copy of @data1
29             $dat->dump_list(); # print to check if both arrays are loaded and their number of elements
30             $dat->unload(name => 'anothername'); # only 'aname' data remains loaded
31             $aref = $dat->access(name => 'aname'); # $aref is a reference to a copy of @data1
32             $href = $dat->get_hoa(); # get all data
33             $dat->dump_vals(name => 'aname', delim => ','); # proof in print it's back
34            
35             # with multiple anonymous arrays:
36             $dat->load(\@data1, \@data2); # any number of anonymous arrays
37             $dat->add([2], [6]); # pushes a single value apiece onto copies of @data1 and @data2
38             $aref = $dat->access(index => 1); # returns reference to copy of @data2, with its new values
39             $dat->unload(index => 0); # only @data2 remains loaded, and its index is now 0
40            
41             =head1 DESCRIPTION
42            
43             Handles data for some other statistics modules, as in loading, updating and retrieving data for analysis. Performs no actual statistical analysis itself.
44            
45             Rationale is not wanting to write the same or similar load, add, etc. methods for every statistics module, not to provide an omnibus API for Perl stat modules. It, however, encompasses much of the variety of how Perl stats modules do the basic handling their data. Used for L (and its sub-tests).
46            
47             =head1 SUBROUTINES/METHODS
48            
49             Manages caches of one or more lists of data for use by some other statistics modules. The lists are ordered arrays comprised of literal scalars (numbers, strings). They can be loaded, added to (updated), accessed or unloaded by referring to the index (order) in which they have been loaded (or previously added to), or by a particular name. The lists are cached within the class object's '_DATA' aref as an aref itself, optionally associated with a 'name'. The particular structures supported here to load, update, retrieve, unload data are specified under L. Any module that uses this one as its base can still use its own rules to select the appropriate list, or provide the appropriate list within the call to itself.
50            
51             =head2 Constructors
52            
53             =head3 new
54            
55             $dat = Statistics::Data->new();
56            
57             Returns a new Statistics::Data object.
58            
59             =cut
60            
61             sub new {
62             my $class = shift;
63             my $self = bless {}, ref($class) ? ref($class) : $class;
64             $self->{_DATA} = [];
65             return $self;
66             }
67            
68             =head3 clone
69            
70             $new_self = $dat->clone();
71            
72             I: B
73            
74             Returns a copy of the class object with its data loaded (if any). This is not a copy of any particular data but the whole blessed hash; nothing modified in this new object affects the original.
75            
76             =cut
77            
78             sub clone {
79             my $self = shift;
80             require Clone;
81             return Clone::clone($self);
82             }
83             *copy = \*clone;
84            
85             =head2 Setting data
86            
87             Methods to cache and uncache data into the data-object.
88            
89             =head3 load
90            
91             $dat->load(ARRAY); # CASE 1 - can be updated/retrieved anonymously, or as index => i (load order)
92             $dat->load(AREF); # CASE 2 - same, as aref
93             $dat->load(STRING => AREF); # CASE 3 - updated/retrieved as name => 'data' (arbitrary name); or by index (order)
94             $dat->load({ STRING => AREF }) # CASE 4 - same as CASE 4, as hashref
95             $dat->load(STRING => AREF, STRING => AREF); # CASE 5 - same as CASE 3 but with multiple named loads
96             $dat->load({ STRING => AREF, STRING => AREF }); # CASE 6 - same as CASE 5 bu as hashref
97             $dat->load(AREF, AREF); # CASE 7 - same as CASE 2 but with multiple aref loads
98            
99             # Not supported:
100             #$dat->load(STRING => ARRAY); # not OK - use CASE 3 instead
101             #$dat->load([AREF, AREF]); # not OK - use CASE 7 instead
102             #$dat->load([ [STRING => AREF], [STRING => AREF] ]); # not OK - use CASE 5 or CASE 6 instead
103             #$dat->load(STRING => AREF, STRING => [AREF, AREF]); # not OK - too mixed to make sense
104            
105             I: B
106            
107             Cache a list of data as an array-reference. Each call removes previous loads, as does sending nothing. If data need to be cached without unloading previous loads, use the L method instead. Arguments with the following structures are acceptable as data, and will be Lible by either index or name as expected:
108            
109             =over 4
110            
111             =item load ARRAY
112            
113             Load an anonymous array that has no named values. For example:
114            
115             $dat->load(1, 4, 7);
116             $dat->load(@ari);
117            
118             This is loaded as a single flat list, with an undefined name, and indexed as 0. Note that trying to load a named dataset with an unreferenced array is wrong - the name will be "folded" into the array itself.
119            
120             =item load AREF
121            
122             Load a reference to a single anonymous array that has no named values, e.g.:
123            
124             $dat->load([1, 4, 7]);
125             $dat->load(\@ari);
126            
127             This is loaded as a single flat list, with an undefined name, and indexed as 0.
128            
129             =item load ARRAY of AREF(s)
130            
131             Same as above, but note that more than one unnamed array-reference can also be loaded at once, e.g.:
132            
133             $dat->load([1, 4, 7], [2, 5, 9]);
134             $dat->load(\@ari1, \@ari2);
135            
136             Each array can be accessed, using L, by specifying B => index, the latter value representing the order in which these arrays were loaded.
137            
138             =item load HASH of AREF(s)
139            
140             Load one or more named references to arrays, e.g.:
141            
142             $dat->load('dist1' => [1, 4, 7]);
143             $dat->load('dist1' => [1, 4, 7], 'dist2' => [2, 5, 9]);
144            
145             This loads the array(s) with a name attribute, so that when calling L, they can be retrieved by name, e.g., passing B => 'dist1'. The load method involves a check that there is an even number of arguments, and that, if this really is a hash, all the keys are defined and not empty, and all the values are in fact array-references.
146            
147             =item load HASHREF of AREF(s)
148            
149             As above, but where the hash is referenced, e.g.:
150            
151             $dat->load({'dist1' => [1, 4, 7], 'dist2' => [2, 5, 9]});
152            
153             =back
154            
155             This means that using the following forms--including a referenced array of referenced arrays--will produce unexpected results, if they do not actually croak, and so should not be used:
156            
157             $dat->load(data => @data); # no croak but wrong - puts "data" in @data - use \@data
158             $dat->load([\@blue_data, \@red_data]); # use unreferenced ARRAY of AREFs instead
159             $dat->load([ [blues => \@blue_data], [reds => \@red_data] ]); # treated as single AREF; use HASH of AREFs instead
160             $dat->load(blues => \@blue_data, reds => [\@red_data1, \@red_data2]); # mixed structures not supported
161            
162             A warning is I thrown if any of the given arrays actually contain no data; could cause too many warnings if multiple analyses on different datasets are run.
163            
164             =cut
165            
166             sub load
167             { # load single aref: cannot load more than one array; keeps a direct reference to the data: any edits creep back.
168             my ( $self, @args ) = @_;
169             $self->unload();
170             $self->add(@args);
171             return 1;
172             }
173             *load_data = \&load;
174            
175             =head3 add
176            
177             I: B, B, B
178            
179             Same usage as above for L. Just push any value(s) or so along, or loads an entirely named list, without clobbering what's already in there (as L would). If data have not been loaded with a name, then appending data to them happens according to the order of array-refs set here; e.g., $dat->add([], \new_data) adds nothing to the first loaded list, and initialises a second array, if none already, or appends the new data to it.
180            
181             =cut
182            
183             sub add {
184             my ( $self, @args ) = @_;
185             my $href = _init_data( $self, @args ); # href of array(s) keyed by index
186             while ( my ( $i, $val ) = each %{$href} ) {
187             if ( defined $val->{'name'} ) { # new named data
188             $self->{_DATA}->[$i] =
189             { aref => $val->{'aref'}, name => $val->{'name'} };
190             }
191             else { # new data, anonymous, indexed only
192             push @{ $self->{_DATA}->[$i]->{'aref'} }, @{ $val->{'aref'} };
193             }
194             }
195             return;
196             }
197             *add_data = \&add;
198             *update = \&add;
199            
200             =head3 unload
201            
202             $dat->unload(); # deletes all cached data, named or not
203             $dat->unload(index => POSINT); # deletes the aref named 'data' whatever
204             $dat->unload(name => STRING); # deletes the aref named 'data' whatever
205            
206             Empty, clear, clobber what's in there. Does nothing if given index or name that does not refer to any loaded data. This should be used whenever any already loaded or added data are no longer required ahead of another L, including via L or L.
207            
208             =cut
209            
210             sub unload {
211             my ( $self, @args ) = @_;
212             if ( !$args[0] ) {
213             $self->{_DATA} = [];
214             }
215             else {
216             my $i = _get_aref_index_by_args( $self, @args );
217             if ( defined $i ) {
218             splice @{ $self->{_DATA} }, $i, 1;
219             }
220             }
221             return;
222             }
223            
224             =head3 share
225            
226             $dat_new->share($dat_old);
227            
228             Adds all the data from one Statistics::Data object to another. Changes in the new copies do not affect the originals.
229            
230             =cut
231            
232             sub share {
233             my ( $self, $other ) = @_;
234             _add_from_object_aref( $self, $other->{_DATA} );
235             return 1;
236             }
237            
238             =head2 Getting data
239            
240             =head3 get_aref
241            
242             $aref = $dat->get_aref(name => STRING);
243             $aref = $dat->get_aref();
244            
245             Returns a reference to a single, previously loaded hashref of arrayed of data, as specified in the named argument B. The array is empty if no data have been loaded, or if there is none with the given B. If B is not defined, and there is only one loaded array, a reference to that array is returned; otherwise croaks.
246            
247             =cut
248            
249             sub get_aref {
250             my ( $self, %args ) = @_;
251             my $name_aref = _get_argument_name_or_names( \%args );
252             my $data_aref = [];
253             if ( nocontent($name_aref)) {
254             if ($self->ndata() == 1 ) {
255             $data_aref = $self->{_DATA}->[0]->{'aref'};
256             }
257             else {
258             croak 'Data to get need to be named';
259             }
260             }
261             else {
262             my $i = _get_aref_index_by_name( $self, $name_aref->[0] );
263            
264             # is name loaded with data? ($i only defined if the name matched data already loaded)
265             if ( defined $i ) {
266             $data_aref = $self->{_DATA}->[$i]->{'aref'};
267             }
268             }
269             return $data_aref;
270             }
271             *get_aref_by_lab = \&get_aref;
272            
273             =head3 get_aoa
274            
275             $aref_of_arefs = $dat->get_aoa(name => AREF);
276             $aref_of_arefs = $dat->get_aoa(); # all loaded data
277            
278             Returns a reference to an array where each value is itself an array of data, as separately loaded under a different name or anonymously, in the order that they were loaded. If no B value is defined, all the loaded data are returned as a list of arefs.
279            
280             =cut
281            
282             sub get_aoa {
283             my ( $self, %args ) = @_;
284             my $name_aref = _get_argument_name_or_names( \%args );
285             my @data = ();
286             if ( !ref $name_aref ) { # get all data
287             for my $i ( 0 .. $self->ndata() - 1 ) {
288             $data[$i] = $self->{_DATA}->[$i]->{'aref'};
289             }
290             }
291             else { # get named data
292             for my $i ( 0 .. scalar @{$name_aref} - 1 ) { # assume ref eq 'ARRAY'
293             my $j = _get_aref_index_by_name( $self, $name_aref->[$i] )
294             ; # is name loaded with data?
295             if ( defined $j ) {
296             $data[$i] = $self->{_DATA}->[$j]->{'aref'};
297             } # else ignore the given name
298             }
299             }
300             return wantarray ? @data : \@data; # unreferenced for chance legacy for now
301             }
302             *get_aoa_by_lab = \&get_aoa;
303            
304             =head3 get_hoa
305            
306             $href = $data->get_hoa(name => AREF); # 1 or more named data
307             $href = $data->get_hoa(); # all named data
308             %hash = $data->get_hoa(); # same but unreferenced
309            
310             Returns a hash or hashref of arefs, where the keys are the names of the loaded data, and the values are arefs of their associated data.
311            
312             By default, all of the loaded data are returned in the (reference to a) hash. The optional argument B is used to return one or more specific data-arrays in the hashref, given a referenced array of their names. Names that have never been used are ignored, and an empty hash (ref) is returned if all names are unknown, or there are no loaded data.
313            
314             =cut
315            
316             sub get_hoa {
317             my ( $self, %args ) = @_;
318             my $name_aref = _get_argument_name_or_names( \%args );
319             my %data = ();
320             if ( !ref $name_aref ) { # get all data
321             for my $i ( 0 .. $self->ndata() - 1 ) {
322             if ( hascontent( $self->{_DATA}->[$i]->{'name'} ) ) {
323             $data{ $self->{_DATA}->[$i]->{'name'} } =
324             $self->{_DATA}->[$i]->{'aref'};
325             }
326             }
327             }
328             else { # get named data
329             for my $i ( 0 .. scalar @{$name_aref} - 1 ) { # assume ref eq 'ARRAY'
330             my $j = _get_aref_index_by_name( $self, $name_aref->[$i] )
331             ; # is name loaded with data?
332             if ( defined $j ) {
333             $data{ $name_aref->[$i] } = $self->{_DATA}->[$j]->{'aref'};
334             } # else ignore the given name
335             }
336             }
337             return wantarray ? %data : \%data;
338             }
339             *get_hoa_by_lab = \&get_hoa;
340            
341             =head3 get_hoa_numonly_indep
342            
343             $hoa = $dat->get_hoa_numonly_indep(name => AREF);
344             $hoa = $dat->get_hoa_numonly_indep();
345            
346             Same as L but each array is culled of any empty or non-numeric values as independent variables, with culls in one array not creating a cull on any other.
347            
348             =cut
349            
350             sub get_hoa_numonly_indep {
351             my ( $self, %args ) = @_;
352             return _cull_hoa_indep( scalar $self->get_hoa(%args), \$self->{'purged'} );
353             }
354             *get_hoa_by_lab_numonly_indep = \&get_hoa_numonly_indep;
355            
356             =head3 get_hoa_numonly_across
357            
358             $hoa = $dat->get_hoa_numonly_across(); # same as get_hoa but each list culled of NaNs at same i across lists
359            
360             Returns hashref of previously loaded variable data (as arefs) culled of an empty or non-numerical values whereby a valid value in one list is culled if it is at an index that is invalid in another list. This is the type of data useful for a dependent ANOVA.
361            
362             =cut
363            
364             sub get_hoa_numonly_across {
365             my ( $self, %args ) = @_;
366             return _cull_hoa_across( scalar $self->get_hoa(%args), \$self->{'purged'} );
367             }
368             *get_hoa_by_lab_numonly_across = \&get_hoa_numonly_across;
369            
370             =head3 access
371            
372             $aref = $dat->access(); #returns the first and/or only array loaded, if any
373             $aref = $dat->access(index => INT); #returns the ith array loaded
374             $aref = $dat->access(name => STRING); # returns a particular named cache of data
375            
376             Returns an aref given its B for order of being "Led" to the loaded data, or by explicit B (as by L). Default returned is the first loaded data, which is reliable if there is only one loaded array.
377            
378             =cut
379            
380             sub access {
381             my ( $self, @args ) = @_;
382             return $self->{_DATA}->[_get_aref_index_by_args( $self, @args )]->{'aref'};
383             }
384             *read = \&access; # legacy only
385            
386             =head3 ndata
387            
388             $n = $dat->ndata();
389            
390             Returns the number of loaded arrays.
391            
392             =cut
393            
394             sub ndata {
395             my $self = shift;
396             return scalar( @{ $self->{'_DATA'} } );
397             }
398            
399             =head3 names
400            
401             $aref = $dat->names();
402            
403             Returns a reference to an array of all the datanames, if any.
404            
405             =cut
406            
407             sub names {
408             my $self = shift;
409             my @names = ();
410             for ( 0 .. scalar @{ $self->{'_DATA'} } - 1 ) {
411             if ( hascontent( $self->{'_DATA'}->[$_]->{'name'} ) ) {
412             push @names, $self->{'_DATA'}->[$_]->{'name'};
413             }
414             }
415             return \@names;
416             }
417             *labels = \&names;
418            
419             =head2 Checking data
420            
421             =head3 all_full
422            
423             $bool = $dat->all_full(AREF); # test data are valid before loading them
424             $bool = $dat->all_full(name => STRING); # checking after loading/adding the data (or key in 'index')
425            
426             Checks not only if the data array, as named or indexed, exists, but if it is non-empty: has no empty elements, with any elements that might exist in there being checked with L.
427            
428             =cut
429            
430             sub all_full {
431             my ( $self, @args ) = @_;
432             my $data = ref $args[0] ? shift @args : $self->access(@args);
433             my ( $bool, @vals ) = ();
434             for ( @{$data} ) {
435             $bool = nocontent($_) ? 0 : 1;
436             if (wantarray) {
437             if ($bool) {
438             push @vals, $_;
439             }
440             }
441             else {
442             last if $bool == 0;
443             }
444             }
445             return wantarray ? ( \@vals, $bool ) : $bool;
446             }
447            
448             =head3 all_numeric
449            
450             $bool = $dat->all_numeric(); # test data first-loaded, if any
451             $bool = $dat->all_numeric(AREF); # test these data are valid before loading them
452             $bool = $dat->all_numeric(name => STRING); # check specific data after loading/adding them by a 'name' or by their 'index' order
453             ($aref, $bool) = $dat->all_numeric([3, '', 4.7, undef, 'b']); # returns ([3, 4.7], 0); - same for any loaded data
454            
455             Given an aref of data, or reference to data previously loaded (see L), tests numeracy of each element, and return, if called in scalar context, a boolean scalar indicating if all data in this aref are defined and not empty (using C in L), and, if they have content, if these are all numerical, using C in L. Alternatively, if called in list context, returns the data (as an aref) less any values that failed this test, followed by the boolean. If the requested data do not exist, returns undef.
456            
457             =cut
458            
459             sub all_numeric {
460             my ( $self, @args ) = @_;
461             my ( $data, $bool, @vals ) = ();
462             if ( ref $args[0] eq 'ARRAY' ) {
463             $data = shift @args;
464             }
465             else {
466             my $i = _get_aref_index_by_args( $self, @args );
467             if ( defined $i ) {
468             $data = $self->{_DATA}->[$i]->{'aref'};
469             }
470             }
471             if ( ref $data ) {
472             for ( @{$data} ) {
473             $bool = _nan($_) ? 0 : 1;
474             if (wantarray) {
475             if ($bool) {
476             push @vals, $_;
477             }
478             }
479             else {
480             last if $bool == 0;
481             }
482             $data = \@vals;
483             }
484             return ( wantarray and $data )
485             ? ( $data, $bool )
486             : $bool
487             ; # just bool even if wantarray when there is no array to return (so bool is null)
488             }
489             else {
490             return;
491             }
492            
493             }
494             *all_numerical = \&all_numeric;
495            
496             =head3 all_proportions
497            
498             $bool = $dat->all_proportions(AREF); # test data are valid before loading them
499             $bool = $dat->all_proportions(name => STRING); # checking after loading/adding the data (or key in 'index')
500            
501             Ensure data are all proportions. Sometimes, the data a module needs are all proportions, ranging from 0 to 1 inclusive. A dataset might have to be cleaned
502            
503             =cut
504            
505             sub all_proportions {
506             my ( $self, @args ) = @_;
507             my $data = ref $args[0] ? shift @args : $self->access(@args);
508             my ( $bool, @vals ) = ();
509             for ( @{$data} ) {
510             if ( nocontent($_) ) {
511             $bool = 0;
512             }
513             elsif ( looks_like_number($_) ) {
514             $bool = ( $_ < 0 || $_ > 1 ) ? 0 : 1;
515             }
516             if (wantarray) {
517             if ($bool) {
518             push @vals, $_;
519             }
520             }
521             else {
522             last if $bool == 0;
523             }
524             }
525             return wantarray ? ( \@vals, $bool ) : $bool;
526             }
527            
528             =head3 all_counts
529            
530             $bool = $dat->all_counts(AREF); # test data are valid before loading them
531             $bool = $dat->all_counts(name => STRING); # checking after loading/adding the data (or key in 'index')
532             ($aref, $bool) = $dat->all_counts(AREF);
533            
534             Returns true if all values in given data are real positive integers or zero, as well as satisfying "hascontent" and "looks_like_number" methods; false otherwise. Called in list context, returns aref of data culled of any values that are false on this basis, and then the boolean. For example, [2.2, 3, 4] and [-1, 3, 4] both fail, but [1, 3, 4] is true. Integer test is simply if $v == int($v).
535            
536             =cut
537            
538             sub all_counts {
539             my ( $self, @args ) = @_;
540             my $data = ref $args[0] ? shift @args : $self->access(@args);
541             my ( $bool, @vals ) = ();
542             for ( @{$data} ) {
543             if ( nocontent($_) ) {
544             $bool = 0;
545             }
546             elsif ( looks_like_number($_) ) {
547             $bool = $_ >= 0 && $_ == int $_ ? 1 : 0;
548             }
549             else {
550             $bool = 0;
551             }
552             if (wantarray) {
553             if ($bool) {
554             push @vals, $_;
555             }
556             }
557             else {
558             last if $bool == 0;
559             }
560             }
561             return wantarray ? ( \@vals, $bool ) : $bool;
562             }
563            
564             =head3 all_pos
565            
566             $bool = $dat->all_pos(AREF); # test data are valid before loading them
567             $bool = $dat->all_pos(name => STRING); # checking after loading/adding the data (or key in 'index')
568             ($aref, $bool) = $dat->all_pos(AREF);
569            
570             Returns true if all values in given data are greater than zero, as well as "hascontent" and "looks_like_number"; false otherwise. Called in list context, returns aref of data culled of any values that are false on this basis, and then the boolean.
571            
572             =cut
573            
574             sub all_pos {
575             my ( $self, @args ) = @_;
576             my $data = ref $args[0] ? shift @args : $self->access(@args);
577             my ( $bool, @vals ) = ();
578             for ( @{$data} ) {
579             if ( nocontent($_) ) {
580             $bool = 0;
581             }
582             elsif ( looks_like_number($_) ) {
583             $bool = $_ > 0 ? 1 : 0;
584             }
585             if (wantarray) {
586             if ($bool) {
587             push @vals, $_;
588             }
589             }
590             else {
591             last if $bool == 0;
592             }
593             }
594             return wantarray ? ( \@vals, $bool ) : $bool;
595             }
596            
597             =head3 equal_n
598            
599             $num = $dat->equal_n(); # number of vals in each loaded data if equal; else 0
600             $num = $dat->equal_n(name => AREF); # names of loaded data to check
601            
602             If the named loaded data all have the same number of elements, then that number is returned; otherwise 0.
603            
604             =cut
605            
606             sub equal_n {
607             my ( $self, %args ) = @_;
608            
609             # supports specific "data" as a name for legacy - to be culled
610             my $data =
611             $args{'data'} ? delete $args{'data'} : $self->get_hoa(%args);
612             my @data = values %{$data};
613             my $n = scalar @{ $data[0] };
614             for ( 1 .. scalar @data - 1 ) {
615             my $count = scalar @{ $data[$_] };
616             if ( $count != $n ) {
617             $n = 0;
618             last;
619             }
620             else {
621             $n = $count;
622             }
623             }
624             return $n;
625             }
626            
627             =head2 Dumping data
628            
629             =head3 dump_vals
630            
631             $dat->dump_vals(delim => ", "); # assumes the first (only?) loaded array should be dumped
632             $dat->dump_vals(index => INT, delim => ", "); # dump the i'th loaded array
633             $dat->dump_vals(name => STRING, delim => ", "); # dump the array loaded/added with the given "name"
634            
635             Prints to STDOUT a space-separated line (ending with "\n") of a loaded/added data's elements. Optionally, give a value for B to specify how the elements in each array should be separated; default is a single space.
636            
637             =cut
638            
639             sub dump_vals {
640             my ( $self, @args ) = @_;
641             my $args = ref $args[0] ? $args[0] : {@args};
642             my $delim = $args->{'delim'} || q{ };
643             print {*STDOUT} join( $delim, @{ $self->access($args) } ), "\n"
644             or croak 'Could not print line to STDOUT';
645             return 1;
646             }
647            
648             =head3 dump_list
649            
650             Dumps a list (using L) of the data currently loaded, without showing their actual elements. List is firstly by index, then by name (if any), then gives the number of elements in the associated array.
651            
652             =cut
653            
654             sub dump_list {
655             my $self = shift;
656             my ( $lim, $name, $N, $len_name, $len_n, $tbl, @rows, @maxlens ) = ();
657             $lim = $self->ndata();
658             my $default = 5;
659             @maxlens = ( ( $lim > $default ? $lim : $default ), $default, 1 );
660             for my $i ( 0 .. $lim - 1 ) {
661             $name =
662             defined $self->{_DATA}->[$i]->{'name'}
663             ? $self->{_DATA}->[$i]->{'name'}
664             : q{-};
665             $N = scalar @{ $self->{_DATA}->[$i]->{'aref'} };
666             $len_name = length $name;
667             $len_n = length $N;
668             if ( $len_name > $maxlens[1] ) {
669             $maxlens[1] = $len_name;
670             }
671             if ( $len_n > $maxlens[2] ) {
672             $maxlens[2] = $len_n;
673             }
674             $rows[$i] = [ $i, $name, $N ];
675             }
676             require Text::SimpleTable;
677             $tbl = Text::SimpleTable->new(
678             [ $maxlens[0], 'index' ],
679             [ $maxlens[1], 'name' ],
680             [ $maxlens[2], 'N' ]
681             );
682             for (@rows) {
683             $tbl->row( @{$_} );
684             }
685             print {*STDOUT} $tbl->draw or croak 'Could not print list of loaded data';
686             return 1;
687             }
688            
689             # PRIVATE METHODS:
690            
691             sub _cull_hoa_indep {
692             my ( $hoa, $purged_n ) = @_;
693             my ( $purged, %purged_data ) = 0;
694             for my $name ( keys %{$hoa} ) {
695             my @clean = ();
696             for my $i ( 0 .. scalar( @{ $hoa->{$name} } ) - 1 ) {
697             if ( _nan( $hoa->{$name}->[$i] ) ) {
698             $purged++;
699             }
700             else {
701             push @clean, $hoa->{$name}->[$i];
702             }
703             }
704             $purged_data{$name} = [@clean];
705             }
706             ${$purged_n} = $purged;
707             return wantarray ? %purged_data : \%purged_data;
708             }
709            
710             sub _cull_hoa_across {
711             my ( $hoa, $purged_n ) = @_;
712            
713             # List all indices in all lists with invalid values;
714             # and copy each group of data for cleaning:
715             my $invalid_i_by_name = _href_of_idx_with_nans_per_name($hoa);
716             my ( %clean, %invalid_idx ) = ();
717             for my $name ( keys %{$hoa} ) {
718             $clean{$name} = $hoa->{$name};
719             while ( my ( $idx, $val ) = each %{ $invalid_i_by_name->{$name} } ) {
720             $invalid_idx{$idx} += $val;
721             }
722             }
723            
724             ${$purged_n} = ( scalar keys %invalid_idx ) || 0;
725            
726             # Purge by index (from highest to lowest):
727             for my $idx ( reverse sort { $a <=> $b } keys %invalid_idx ) {
728             for my $name ( keys %clean ) {
729             if ( $idx < scalar @{ $clean{$name} } ) {
730             splice @{ $clean{$name} }, $idx, 1;
731             }
732             }
733             }
734             return wantarray ? %clean : \%clean;
735             }
736            
737             sub _init_data {
738             my ( $self, @args ) = @_;
739            
740             my $data = {};
741             if ( _isa_hashref_of_arefs( $args[0] ) ) { # cases 4 & 6
742             $data = _init_named_data( $self, $args[0] );
743             }
744             elsif ( _isa_hash_of_arefs(@args) ) { # cases 3 & 5
745             $data = _init_named_data( $self, {@args} );
746             }
747             elsif ( _isa_array_of_arefs(@args) ) { # cases 2 & 7
748             $data = _init_unnamed_data(@args);
749             }
750             else { # assume @args is just a list of strings - case 1
751             if ( ref $args[0] ) {
752             croak
753             'Don\'t know how to load/add the given data: Need to be in the structure of HOA (referenced or not), or an unreferenced AOA';
754             }
755             else {
756             $data->{0} = { aref => [@args], name => undef };
757             }
758             }
759             return $data;
760             }
761            
762             sub _isa_hashref_of_arefs {
763             my $arg = shift;
764             if ( not ref $arg or ref $arg ne 'HASH' ) {
765             return 0;
766             }
767             else {
768             return _isa_hash_of_arefs( %{$arg} );
769             }
770             }
771            
772             sub _isa_hash_of_arefs {
773            
774             # determines that:
775             # - scalar @args passes Number::Misc is_even, then that:
776             # - every odd indexed value 'hascontent' via String::Util
777             # - every even indexed value is aref
778             my @args = @_;
779             my $bool = 0;
780             if ( is_even( scalar @args ) )
781             { # Number::Misc method - not odd number in assignment
782             my %args = @args; # so assume is hash
783             HASH_CHECK:
784             while ( my ( $name, $val ) = each %args ) {
785             if ( hascontent($name) && ref $val eq 'ARRAY' ) {
786             $bool = 1;
787             }
788             else {
789             $bool = 0;
790             }
791             last HASH_CHECK if $bool == 0;
792             }
793             }
794             else {
795             $bool = 0;
796             }
797             return $bool;
798             }
799            
800             sub _isa_array_of_arefs {
801             my @args = @_;
802             if ( all { ref $_ eq 'ARRAY' } @args ) {
803             return 1;
804             }
805             else {
806             return 0;
807             }
808             }
809            
810             sub _init_named_data {
811             my ( $self, $href ) = @_;
812             my ( $i, %data ) = ( scalar @{ $self->{_DATA} } );
813             while ( my ( $name, $aref ) = each %{$href} ) {
814             my $j = _get_aref_index_by_name( $self, $name );
815             if ( defined $j )
816             { # already a name for these data, so don't need to define it for this init
817             $data{$j} = { aref => [ @{$aref} ], name => undef };
818             }
819             else { # no aref named $name yet: define for aref and name
820             $data{ $i++ } = { aref => [ @{$aref} ], name => $name };
821             }
822             }
823             return \%data;
824             }
825            
826             sub _init_unnamed_data {
827             my @args = @_;
828             my %data = ();
829             for my $i ( 0 .. scalar @args - 1 ) {
830             $data{$i} = { aref => [ @{ $args[$i] } ], name => undef };
831             }
832             return \%data;
833             }
834            
835             sub _get_aref_index_by_args {
836             my ( $self, @args ) = @_;
837             my $i = 0; # get first by default
838             my $args = ref $args[0] ? $args[0] : {@args};
839             if ( looks_like_number( $args->{'index'} ) and ref $self->{_DATA}->[$args->{'index'}] ) {
840             $i = $args->{'index'};
841             }
842             else {
843             my $name = _name_or_label($args);
844             if ( hascontent($name) ) {
845             $i = _get_aref_index_by_name( $self, $name );
846             }
847             }
848             return $i;
849             }
850            
851             sub _get_aref_index_by_name {
852             my ( $self, $name ) = @_;
853             my ( $i, $found ) = ( 0, 0 );
854             for ( ; $i < scalar @{ $self->{_DATA} } ; $i++ ) {
855             if ( $self->{_DATA}->[$i]->{'name'}
856             and $self->{_DATA}->[$i]->{'name'} eq $name )
857             {
858             $found = 1;
859             last;
860             }
861             }
862             return $found ? $i : undef;
863             }
864            
865             sub _add_from_object_aref {
866             my ( $self, $aref ) = @_;
867             for my $dat ( @{$aref} ) {
868             if ( hascontent( $dat->{'name'} ) ) {
869             $self->add( $dat->{'name'} => $dat->{'aref'} );
870             }
871             else {
872             $self->add( $dat->{'aref'} );
873             }
874             }
875             return 1;
876             }
877            
878             sub _href_of_idx_with_nans_per_name {
879             my $hoa = shift;
880             my %invalid_i_by_name = ();
881             for my $name ( keys %{$hoa} ) {
882             for my $i ( 0 .. scalar( @{ $hoa->{$name} } ) - 1 ) {
883             if ( _nan( $hoa->{$name}->[$i] ) ) {
884             $invalid_i_by_name{$name}->{$i} = 1;
885             }
886             }
887             }
888             return \%invalid_i_by_name;
889             }
890            
891             # Return AREF of names given as an aref or single string as value to optional argument:
892             sub _get_argument_name_or_names {
893             my $href = shift;
894             my $var = _name_or_label($href);
895             return hascontent($var) ? ref $var ? $var : [$var] : q{};
896             }
897            
898             sub _name_or_label {
899             my $href = shift;
900             my $str = first { $href->{$_} } qw/lab label name/;
901             return $str ? $href->{$str} : q{};
902             }
903            
904             sub _nan {
905             return !looks_like_number(shift) ? 1 : 0;
906             }
907            
908             ## Deprecated/obsolete methods:
909             sub load_from_file {
910             croak __PACKAGE__
911             . ': load_from_file() method is obsolete from v.11; read-in and save data by your own methods';
912             }
913            
914             sub save_to_file {
915             croak __PACKAGE__
916             . ': load_from_file() method is obsolete from v.11; read-in and save data by your own methods';
917             }
918            
919             =head1 DIAGNOSTICS
920            
921             =over 4
922            
923             =item Don't know how to load/add the given data
924            
925             Croaked when attempting to load or add data with an unsupported data structure where the first argument is a reference. See the examples under L for valid (and invalid) ways of sending data to them.
926            
927             =item Data for accessing need to be loaded
928            
929             Croaked when calling L, or any methods that use it internally -- viz., L and the validity checks L -- when it is called with a name for data that have not been loaded, or did not load successfully.
930            
931             =item Data for unloading need to be loaded
932            
933             Croaked when calling L with an index or a name attribute and the data these refer to have not been loaded, or did not load successfully.
934            
935             =item Data to get need to be named
936            
937             Croaked when calling L and no name is specified for the aref to get, and there is more than one loaded aref to choose from.
938            
939             =back
940            
941             =head1 DEPENDENCIES
942            
943             L - used for its C method when testing loads
944            
945             L - used for its C method when testing loads
946            
947             L - used for its C and C methods
948            
949             L - required for L
950            
951             L - required for L
952            
953             =head1 BUGS AND LIMITATIONS
954            
955             Please report any bugs or feature requests to C, or through the web interface at L. This will notify the author, and then you'll automatically be notified of progress on your bug as any changes are made.
956            
957             =head1 SUPPORT
958            
959             You can find documentation for this module with the perldoc command.
960            
961             perldoc Statistics::Data
962            
963             You can also look for information at:
964            
965             =over 4
966            
967             =item * RT: CPAN's request tracker
968            
969             L
970            
971             =item * AnnoCPAN: Annotated CPAN documentation
972            
973             L
974            
975             =item * CPAN Ratings
976            
977             L
978            
979             =item * Search CPAN
980            
981             L
982            
983             =back
984            
985             =head1 AUTHOR
986            
987             Roderick Garton, C<< >>
988            
989             =head1 LICENSE AND COPYRIGHT
990            
991             Copyright 2009-2017 Roderick Garton
992            
993             This program is free software; you can redistribute it and/or modify it under the terms of either: the GNU General Public License as published
994             by the Free Software Foundation; or the Artistic License. See L for more information.
995            
996             =cut
997            
998             1; # End of Statistics::Data