File Coverage

blib/lib/Statistics/Data.pm
Criterion Covered Total %
statement 278 354 78.5
branch 123 180 68.3
condition 15 25 60.0
subroutine 40 48 83.3
pod 23 25 92.0
total 479 632 75.7


line stmt bran cond sub pod time code
1             package Statistics::Data;
2 7     7   121262 use strict;
  7         15  
  7         232  
3 7     7   27 use warnings FATAL => 'all';
  7         10  
  7         305  
4 7     7   27 use Carp qw(carp croak);
  7         9  
  7         496  
5 7         605 use List::AllUtils qw(all first)
6 7     7   4398 ; # i.e., single method 'all', not ':all' methods
  7         96313  
7 7     7   3228 use Number::Misc qw(is_even);
  7         6565  
  7         379  
8 7     7   34 use Scalar::Util qw(looks_like_number);
  7         10  
  7         513  
9 7     7   3514 use String::Util qw(hascontent nocontent);
  7         35026  
  7         22391  
10             our $VERSION = '0.10';
11            
12             =head1 NAME
13            
14             Statistics::Data - Load, access, update one or more data lists for statistical analysis
15            
16             =head1 VERSION
17            
18             This is documentation for B of Statistics/Data.pm, released Jan 2017.
19            
20             =head1 SYNOPSIS
21            
22             use Statistics::Data 0.10;
23             my $dat = Statistics::Data->new();
24            
25             # managing labelled arrays:
26             $dat->load({'aname' => \@data1, 'anothername' => \@data2}); # labels are arbitrary
27             $aref = $dat->access(label => 'aname'); # gets back a copy of @data1
28             $dat->add(aname => [2, 3]); # pushes new values onto loaded copy of @data1
29             $dat->dump_list(); # print to check if both arrays are loaded and their number of elements
30             $dat->unload(label => 'anothername'); # only 'aname' data remains loaded
31             $aref = $dat->access(label => 'aname'); # $aref is a reference to a copy of @data1
32             $dat->dump_vals(label => 'aname', delim => ','); # proof in print it's back
33            
34             # managing multiple anonymous arrays:
35             $dat->load(\@data1, \@data2); # any number of anonymous arrays
36             $dat->add([2], [6]); # pushes a single value apiece onto copies of @data1 and @data2
37             $aref = $dat->access(index => 1); # returns reference to copy of @data2, with its new values
38             $dat->unload(index => 0); # only @data2 remains loaded, and its index is now 0
39            
40             =head1 DESCRIPTION
41            
42             Handles data for some other statistics modules, as in loading, updating and retrieving data for analysis. Performs no actual statistical analysis itself.
43            
44             Rationale is not wanting to write the same or similar load, add, etc. methods for every statistics module, not to provide an omnibus API for Perl stat modules. It, however, encompasses much of the variety of how Perl stats modules do the basic handling their data. Used for L (and its sub-tests).
45            
46             =head1 SUBROUTINES/METHODS
47            
48             Manages caches of one or more lists of data for use by some other statistics modules. The lists are ordered arrays comprised of literal scalars (numbers, strings). They can be loaded, added to (updated), accessed or unloaded by referring to the index (order) in which they have been loaded (or previously added to), or by a particular label. The lists are cached within the class object's '_DATA' aref as an aref itself, optionally associated with a 'label'. The particular structures supported here to load, update, retrieve, unload data are specified under L. Any module that uses this one as its base can still use its own rules to select the appropriate list, or provide the appropriate list within the call to itself.
49            
50             =head2 Constructors
51            
52             =head3 new
53            
54             $dat = Statistics::Data->new();
55            
56             Returns a new Statistics::Data object.
57            
58             =cut
59            
60             sub new {
61 7     7 1 82 my $class = shift;
62 7 50       30 my $self = bless {}, ref($class) ? ref($class) : $class;
63 7         41 $self->{_DATA} = [];
64 7         21 return $self;
65             }
66            
67             =head3 clone
68            
69             $new_self = $dat->clone();
70            
71             I: B
72            
73             Returns a copy of the class object with its data loaded (if any). Note this is not a copy of any particular data but the whole blessed hash. Alternatively, use L to get all the data added to a new object, or use L to load/add particular arrays of data into another object. Nothing modified in this new object affects the original.
74            
75             =cut
76            
77             sub clone {
78 0     0 1 0 my $self = shift;
79 0         0 require Clone;
80 0         0 return Clone::clone($self);
81             }
82             *copy = \*clone;
83            
84             =head2 Setting data
85            
86             Methods to cache and uncache data into the data-object.
87            
88             =head3 load
89            
90             $dat->load(ARRAY); # CASE 1 - can be updated/retrieved anonymously, or as index => i (load order)
91             $dat->load(AREF); # CASE 2 - same, as aref
92             $dat->load(STRING => AREF); # CASE 3 - updated/retrieved as label => 'data' (arbitrary name); or by index (order)
93             $dat->load({ STRING => AREF }) # CASE 4 - same as CASE 4, as hashref
94             $dat->load(STRING => AREF, STRING => AREF); # CASE 5 - same as CASE 3 but with multiple named loads
95             $dat->load({ STRING => AREF, STRING => AREF }); # CASE 6 - same as CASE 5 bu as hashref
96             $dat->load(AREF, AREF); # CASE 7 - same as CASE 2 but with multiple aref loads
97            
98             # Not supported:
99             #$dat->load(STRING => ARRAY); # not OK - use CASE 3 instead
100             #$dat->load([AREF, AREF]); # not OK - use CASE 7 instead
101             #$dat->load([ [STRING => AREF], [STRING => AREF] ]); # not OK - use CASE 5 or CASE 6 instead
102             #$dat->load(STRING => AREF, STRING => [AREF, AREF]); # not OK - too mixed to make sense
103            
104             I: B
105            
106             Cache a list of data as an array-reference. Each call removes previous loads, as does sending nothing. If data need to be cached without unloading previous loads, use the L method instead. Arguments with the following structures are acceptable as data, and will be Lible by either index or label as expected:
107            
108             =over 4
109            
110             =item load ARRAY
111            
112             Load an anonymous array that has no named values. For example:
113            
114             $dat->load(1, 4, 7);
115             $dat->load(@ari);
116            
117             This is loaded as a single flat list, with an undefined label, and indexed as 0. Note that trying to load a labelled dataset with an unreferenced array is wrong - the label will be "folded" into the sequence itself.
118            
119             =item load AREF
120            
121             Load a reference to a single anonymous array that has no named values, e.g.:
122            
123             $dat->load([1, 4, 7]);
124             $dat->load(\@ari);
125            
126             This is loaded as a single flat list, with an undefined label, and indexed as 0.
127            
128             =item load ARRAY of AREF(s)
129            
130             Same as above, but note that more than one unlabelled array-reference can also be loaded at once, e.g.:
131            
132             $dat->load([1, 4, 7], [2, 5, 9]);
133             $dat->load(\@ari1, \@ari2);
134            
135             Each array can be accessed, using L, by specifying B => index, the latter value representing the order in which these arrays were loaded.
136            
137             =item load HASH of AREF(s)
138            
139             Load one or more labelled references to arrays, e.g.:
140            
141             $dat->load('dist1' => [1, 4, 7]);
142             $dat->load('dist1' => [1, 4, 7], 'dist2' => [2, 5, 9]);
143            
144             This loads the array(s) with a label attribute, so that when calling L, they can be retrieved by name, e.g., passing B
145            
146             =item load HASHREF of AREF(s)
147            
148             As above, but where the hash is referenced, e.g.:
149            
150             $dat->load({'dist1' => [1, 4, 7], 'dist2' => [2, 5, 9]});
151            
152             =back
153            
154             This means that using the following forms--including a referenced array of referenced arrays--will produce unexpected results, if they do not actually croak, and so should not be used:
155            
156             $dat->load(data => @data); # no croak but wrong - puts "data" in @data - use \@data
157             $dat->load([\@blue_data, \@red_data]); # use unreferenced ARRAY of AREFs instead
158             $dat->load([ [blues => \@blue_data], [reds => \@red_data] ]); # treated as single AREF; use HASH of AREFs instead
159             $dat->load(blues => \@blue_data, reds => [\@red_data1, \@red_data2]); # mixed structures not supported
160            
161             A warning is I thrown if any of the given arrays actually contain no data. This could be sefully thrown; a child module might depend on there actually being data to statistically analyse (why not?) but only throw an error late in the process about it, and then perhaps ambiguously. But this could cause too many warnings if multiple analyses on different datasets are being programmatically run.
162            
163             =cut
164            
165             sub load
166             { # load single aref: cannot load more than one array; keeps a direct reference to the data: any edits creep back.
167 22     22 1 17749 my ( $self, @args ) = @_;
168 22         68 $self->unload();
169 22         64 $self->add(@args);
170 22         42 return 1;
171             }
172             *load_data = \&load;
173            
174             =head3 add
175            
176             I: B, B, B
177            
178             Same usage as above for L. Just push any value(s) or so along, or loads an entirely labelled list, without clobbering what's already in there (as L would). If data have not been loaded with a label, then appending data to them happens according to the order of array-refs set here, see L could even skip adding something to one previously loaded list by, e.g., going $dat->add([], \new_data) - adding nothing to the first loaded list, and initialising a second array, if none already, or appending these data to it.
179            
180             =cut
181            
182             sub add {
183 34     34 1 5841 my ( $self, @args ) = @_;
184 34         78 my $tmp = _init_data( $self, @args )
185             ; # hashref of data array(s) keyed by index to use for loading or adding
186 34         33 while ( my ( $i, $val ) = each %{$tmp} ) {
  78         193  
187 44 100       72 if ( defined $val->{'lab'} ) { # newly labelled data
188             $self->{_DATA}->[$i] =
189 27         97 { seq => $val->{'seq'}, lab => $val->{'lab'} };
190             }
191             else
192             { # data to be added to existing cache, or an anonymous load, indexed only
193 17         16 push @{ $self->{_DATA}->[$i]->{'seq'} }, @{ $val->{'seq'} };
  17         51  
  17         42  
194             }
195             }
196 34         88 return;
197             }
198             *add_data = \&add;
199             *append_data = \&add;
200             *update = \&add;
201            
202             =head3 unload
203            
204             $dat->unload(); # deletes all cached data, named or not
205             $dat->unload(index => POSINT); # deletes the aref named 'data' whatever
206             $dat->unload(label => STRING); # deletes the aref named 'data' whatever
207            
208             Empty, clear, clobber what's in there. Does nothing if given index or label that does not refer to any loaded data. This should be used whenever any already loaded or added data are no longer required ahead of another L, including via L or L.
209            
210             =cut
211            
212             sub unload {
213 24     24 1 440 my ( $self, @args ) = @_;
214 24 100       53 if ( !$args[0] ) {
215 23         52 $self->{_DATA} = [];
216             }
217             else {
218 1         3 my $i = _index_by_args( $self, @args );
219 1 50       3 if ( defined $i ) {
220 1         1 splice @{ $self->{_DATA} }, $i, 1;
  1         3  
221             }
222             }
223 24         56 return;
224             }
225            
226             =head3 share
227            
228             $dat_new->share($dat_old);
229            
230             Adds all the data from one Statistics::Data object to another. Changes in the new copies do not affect the originals.
231            
232             =cut
233            
234             sub share {
235 1     1 1 5 my ( $self, $other ) = @_;
236 1         4 _add_from_object_aref( $self, $other->{_DATA} );
237 1         2 return 1;
238             }
239            
240             =head2 Getting data
241            
242             To retrieve what has been previously loaded, simply call L, specifying the "label" or "index" that was used to load/add the data - i.e., when loaded as a hashref or an arrayref, respectively; specifying the list by B
243            
244             For retrieving more than one previously loaded dataset, use one of the "get" methods, choosing between getting back a hash- or an array-ref, or to get back a single list, as by L, after all. These "get" methods only support retrieving data loaded as hashrefs; use L to get back index-specific loads.
245            
246             =head3 access
247            
248             $aref = $dat->access(); #returns the first and/or only array loaded, if any
249             $aref = $dat->access(index => INT); #returns the ith array loaded
250             $aref = $dat->access(label => STRING); # returns a particular named cache of data
251            
252             I: B
253            
254             Returns one referenced array being previously loaded/added to data by the given B (in a flat-list load) or B
255            
256             =cut
257            
258             sub access {
259 24     24 1 5198 my ( $self, @args ) = @_;
260 24         23 my $val;
261 24         43 my $i = _index_by_args( $self, @args );
262 24 50       48 if ( defined $i ) {
263 24         34 $val = $self->{_DATA}->[$i]->{'seq'};
264             }
265 24         42 return $val;
266             }
267             *read = \&access; # legacy only
268            
269             =head3 get_hoa, get_hoa_by_lab
270            
271             $href = $data->get_hoa(label => AREF_of_STRINGS); # retrieve 1 or more named data
272             $href = $data->get_hoa(); # retrieve all named data
273            
274             Returns a hashref of arefs, where the keys are the names of the data, as previously given in a load, and the values are arefs of the list of data that has been loaded for that name.
275            
276             The optional argument B
277            
278             This is useful in a module like L that needs to continuously cross-refer to multiple variables to make a single calculation while also being able to distinguish them by some meaningful key other than simply an index number.
279            
280             For working with numerical data in particular, see the following two methods.
281            
282             =cut
283            
284             sub get_hoa_by_lab {
285 8     8 1 1781 my ( $self, %args ) = @_;
286 8         18 my $name_aref = _get_given_names( \%args );
287 8         54 my %data = ();
288 8 100       11 if ( !ref $name_aref ) { # get all data
289 1         4 for my $i ( 0 .. $self->ndata() - 1 ) {
290 2 50       4 if ( hascontent( $self->{_DATA}->[$i]->{'lab'} ) ) {
291             $data{ $self->{_DATA}->[$i]->{'lab'} } =
292 2         14 $self->{_DATA}->[$i]->{'seq'};
293             }
294             }
295             }
296             else { # get named data
297 7         5 for my $i ( 0 .. scalar @{$name_aref} - 1 ) { # assume ref eq 'ARRAY'
  7         18  
298 11         14 my $j = _seq_index_by_label( $self, $name_aref->[$i] )
299             ; # is name loaded with data?
300 11 100       16 if ( defined $j ) {
301 9         14 $data{ $name_aref->[$i] } = $self->{_DATA}->[$j]->{'seq'};
302             } # else ignore the given name
303             }
304             }
305 8 100       31 return wantarray ? %data : \%data;
306             }
307             *get_hoa = \&get_hoa_by_lab;
308            
309             =head3 get_hoa_by_lab_numonly_indep
310            
311             $hoa = $dat->get_hoa_by_lab_numonly_indep(label => AREF);
312             $hoa = $dat->get_hoa_by_lab_numonly_indep();
313            
314             Returns the variables given in the argument B
315            
316             =cut
317            
318             sub get_hoa_by_lab_numonly_indep {
319 1     1 1 149 my ( $self, %args ) = @_;
320             return _cull_hoa_indep( scalar $self->get_hoa_by_lab(%args),
321 1         11 \$self->{'purged'} );
322             }
323            
324             =head3 get_hoa_by_lab_numonly_across
325            
326             $hoa = $dat->get_hoa_by_lab_numonly_across(); # same as get_hoa but each list culled of NaNs at same i across lists
327            
328             Returns hashref of previously loaded variable data (as arefs) culled of an empty or non-numerical values whereby even a valid value in one list is culled if it is at an index that is invalid in another list. This is the type of data useful for a dependent ANOVA.
329            
330             =cut
331            
332             sub get_hoa_by_lab_numonly_across {
333 2     2 1 302 my ( $self, %args ) = @_;
334             return _cull_hoa_across( scalar $self->get_hoa_by_lab(%args),
335 2         6 \$self->{'purged'} );
336             }
337            
338             =head3 get_aoa, get_aoa_by_lab
339            
340             $aref_of_arefs = $dat->get_aoa_by_lab(label => AREF);
341             $aref_of_arefs = $dat->get_aoa_by_lab(); # all loaded data
342            
343             Returns a reference to an array where each value is itself an array of data, as separately loaded under a different name or anonymously, in the order that they were loaded. If no B
344            
345             =cut
346            
347             sub get_aoa_by_lab {
348 5     5 1 1622 my ( $self, %args ) = @_;
349 5         15 my $name_aref = _get_given_names( \%args );
350 5         36 my @data = ();
351 5 100       8 if ( !ref $name_aref ) { # get all data
352 1         3 for my $i ( 0 .. $self->ndata() - 1 ) {
353 2         4 $data[$i] = $self->{_DATA}->[$i]->{'seq'};
354             }
355             }
356             else { # get named data
357 4         4 for my $i ( 0 .. scalar @{$name_aref} - 1 ) { # assume ref eq 'ARRAY'
  4         9  
358 4         6 my $j = _seq_index_by_label( $self, $name_aref->[$i] )
359             ; # is name loaded with data?
360 4 100       6 if ( defined $j ) {
361 3         7 $data[$i] = $self->{_DATA}->[$j]->{'seq'};
362             } # else ignore the given name
363             }
364             }
365 5 50       16 return wantarray ? @data : \@data; # unreferenced for chance legacy for now
366             }
367             *get_aoa = \&get_aoa_by_lab;
368            
369             # Return AREF of names given as an optional argument:
370            
371             #sub _get_given_names {
372             # my $href = shift;
373             # return hascontent( $href->{'lab'} ) ? ref $href->{'lab'} ? $href->{'lab'} : [ $href->{'lab'} ] : q{};
374            
375             #}
376            
377             # Return AREF of names given as an aref or single string as value to optional argument:
378             sub _get_given_names {
379 13     13   9 my $href = shift;
380 13         18 my $var = _name_or_label($href);
381 13 100       25 return hascontent($var) ? ref $var ? $var : [$var] : q{};
    100          
382             }
383            
384             sub _name_or_label {
385 13     13   11 my $href = shift;
386 13     17   54 my $str = first { $href->{$_} } qw/lab label name/;
  17         24  
387 13 100       39 return $str ? $href->{$str} : q{};
388             }
389            
390             =head3 get_aref_by_lab
391            
392             $aref = $dat->get_aref_by_lab(label => STRING);
393             $aref = $dat->get_aref_by_lab();
394            
395             Returns a reference to a single, previously loaded hashref of arrayed of data, as specified in the named argument B
396            
397             =cut
398            
399             sub get_aref_by_lab {
400 0     0 1 0 my ( $self, %args ) = @_;
401 0         0 my $name_aref = _get_given_names( \%args );
402 0         0 my $data_aref = [];
403 0 0 0     0 if ( nocontent($name_aref) && ref $self->{_DATA}->[-1]->{'seq'} ) {
404 0         0 $data_aref = $self->{_DATA}->[-1]->{'seq'};
405             }
406             else {
407 0         0 my $i = _seq_index_by_label( $self, $name_aref );
408            
409             # is name loaded with data? ($i only defined if the name matched data already loaded)
410 0 0       0 if ( defined $i ) {
411 0         0 $data_aref = $self->{_DATA}->[$i]->{'seq'};
412             }
413             }
414 0         0 return $data_aref;
415             }
416            
417             =head3 ndata
418            
419             $n = $dat->ndata();
420            
421             Returns the number of loaded variables.
422            
423             =cut
424            
425             sub ndata {
426 8     8 1 1362 my $self = shift;
427 8         11 return scalar( @{ $self->{'_DATA'} } );
  8         32  
428             }
429            
430             =head3 labels
431            
432             $aref = $dat->labels();
433            
434             Returns a reference to an array of all the datanames (labels), if any.
435            
436             =cut
437            
438             sub labels {
439 1     1 1 9 my $self = shift;
440 1         2 my @names = ();
441 1         3 for ( 0 .. scalar @{ $self->{'_DATA'} } - 1 ) {
  1         7  
442 2 50       8 if ( hascontent( $self->{'_DATA'}->[$_]->{'lab'} ) ) {
443 2         28 push @names, $self->{'_DATA'}->[$_]->{'lab'};
444             }
445             }
446 1         4 return \@names;
447             }
448            
449             =head2 Checking data
450            
451             =head3 all_full
452            
453             $bool = $dat->all_full(AREF); # test data are valid before loading them
454             $bool = $dat->all_full(label => STRING); # checking after loading/adding the data (or key in 'index')
455            
456             Checks not only if the data array, as named or indexed, exists, but if it is non-empty: has no empty elements, with any elements that might exist in there being checked with L.
457            
458             =cut
459            
460             sub all_full {
461 3     3 1 3479 my ( $self, @args ) = @_;
462 3 50       10 my $data = ref $args[0] ? shift @args : $self->access(@args);
463 3         6 my ( $bool, @vals ) = ();
464 3         3 for ( @{$data} ) {
  3         7  
465 24 100       37 $bool = nocontent($_) ? 0 : 1;
466 24 100       153 if (wantarray) {
467 5 100       8 if ($bool) {
468 3         4 push @vals, $_;
469             }
470             }
471             else {
472 19 100       35 last if $bool == 0;
473             }
474             }
475 3 100       11 return wantarray ? ( \@vals, $bool ) : $bool;
476             }
477            
478             =head3 all_numeric
479            
480             $bool = $dat->all_numeric(); # test data first-loaded, if any
481             $bool = $dat->all_numeric(AREF); # test these data are valid before loading them
482             $bool = $dat->all_numeric(label => STRING); # check specific data after loading/adding them by a 'label' or by their 'index' order
483             ($aref, $bool) = $dat->all_numeric([3, '', 4.7, undef, 'b']); # returns ([3, 4.7], 0); - same for any loaded data
484            
485             Given an aref of data, or reference to data previously loaded (see L), tests numeracy of each element, and return, if called in scalar context, a boolean scalar indicating if all data in this aref are defined and not empty (using C in L), and, if they have content, if these are all numerical, using C in L. Alternatively, if called in list context, returns the data (as an aref) less any values that failed this test, followed by the boolean. If the requested data do not exist, returns undef.
486            
487             =cut
488            
489             sub all_numeric {
490 7     7 1 2789 my ( $self, @args ) = @_;
491 7         12 my ( $data, $bool, @vals ) = ();
492 7 100       19 if ( ref $args[0] eq 'ARRAY' ) {
493 3         8 $data = shift @args;
494             }
495             else {
496 4         7 my $i = _index_by_args( $self, @args );
497 4 50       17 if ( defined $i ) {
498 4         5 $data = $self->{_DATA}->[$i]->{'seq'};
499             }
500             }
501 7 50       18 if ( ref $data ) {
502 7         10 for ( @{$data} ) {
  7         17  
503 58 100       62 $bool = _nan($_) ? 0 : 1;
504 58 100       62 if (wantarray) {
505 5 100       8 if ($bool) {
506 2         3 push @vals, $_;
507             }
508             }
509             else {
510 53 100       82 last if $bool == 0;
511             }
512 53         52 $data = \@vals;
513             }
514 7 100 66     39 return ( wantarray and $data )
515             ? ( $data, $bool )
516             : $bool
517             ; # just bool even if wantarray when there is no array to return (so bool is null)
518             }
519             else {
520 0         0 return;
521             }
522            
523             }
524             *all_numerical = \&all_numeric;
525            
526             =head3 all_proportions
527            
528             $bool = $dat->all_proportions(AREF); # test data are valid before loading them
529             $bool = $dat->all_proportions(label => STRING); # checking after loading/adding the data (or key in 'index')
530            
531             Ensure data are all proportions. Sometimes, the data a module needs are all proportions, ranging from 0 to 1 inclusive. A dataset might have to be cleaned
532            
533             =cut
534            
535             sub all_proportions {
536 5     5 1 551 my ( $self, @args ) = @_;
537 5 100       19 my $data = ref $args[0] ? shift @args : $self->access(@args);
538 5         10 my ( $bool, @vals ) = ();
539 5         7 for ( @{$data} ) {
  5         12  
540 15 100       25 if ( nocontent($_) ) {
    100          
541 3         16 $bool = 0;
542             }
543             elsif ( looks_like_number($_) ) {
544 11 100 66     208 $bool = ( $_ < 0 || $_ > 1 ) ? 0 : 1;
545             }
546 15 100       35 if (wantarray) {
547 5 100       10 if ($bool) {
548 1         2 push @vals, $_;
549             }
550             }
551             else {
552 10 100       23 last if $bool == 0;
553             }
554             }
555 5 100       23 return wantarray ? ( \@vals, $bool ) : $bool;
556             }
557            
558             =head3 all_counts
559            
560             $bool = $dat->all_counts(AREF); # test data are valid before loading them
561             $bool = $dat->all_counts(label => STRING); # checking after loading/adding the data (or key in 'index')
562             ($aref, $bool) = $dat->all_counts(AREF);
563            
564             Returns true if all values in given data are real positive integers or zero, as well as satisfying "hascontent" and "looks_like_number" methods; false otherwise. Called in list context, returns aref of data culled of any values that are false on this basis, and then the boolean. For example, [2.2, 3, 4] and [-1, 3, 4] both fail, but [1, 3, 4] is true. Integer test is simply if $v == int($v).
565            
566             =cut
567            
568             sub all_counts {
569 3     3 1 13 my ( $self, @args ) = @_;
570 3 50       12 my $data = ref $args[0] ? shift @args : $self->access(@args);
571 3         7 my ( $bool, @vals ) = ();
572 3         4 for ( @{$data} ) {
  3         6  
573 5 50       10 if ( nocontent($_) ) {
    50          
574 0         0 $bool = 0;
575             }
576             elsif ( looks_like_number($_) ) {
577 5 100 100     60 $bool = $_ >= 0 && $_ == int $_ ? 1 : 0;
578             }
579             else {
580 0         0 $bool = 0;
581             }
582 5 50       15 if (wantarray) {
583 0 0       0 if ($bool) {
584 0         0 push @vals, $_;
585             }
586             }
587             else {
588 5 100       12 last if $bool == 0;
589             }
590             }
591 3 50       10 return wantarray ? ( \@vals, $bool ) : $bool;
592             }
593            
594             =head3 all_pos
595            
596             $bool = $dat->all_pos(AREF); # test data are valid before loading them
597             $bool = $dat->all_pos(label => STRING); # checking after loading/adding the data (or key in 'index')
598             ($aref, $bool) = $dat->all_pos(AREF);
599            
600             Returns true if all values in given data are greater than zero, as well as "hascontent" and "looks_like_number"; false otherwise. Called in list context, returns aref of data culled of any values that are false on this basis, and then the boolean.
601            
602             =cut
603            
604             sub all_pos {
605 2     2 1 12 my ( $self, @args ) = @_;
606 2 50       8 my $data = ref $args[0] ? shift @args : $self->access(@args);
607 2         11 my ( $bool, @vals ) = ();
608 2         4 for ( @{$data} ) {
  2         7  
609 5 50       10 if ( nocontent($_) ) {
    50          
610 0         0 $bool = 0;
611             }
612             elsif ( looks_like_number($_) ) {
613 5 100       48 $bool = $_ > 0 ? 1 : 0;
614             }
615 5 50       8 if (wantarray) {
616 0 0       0 if ($bool) {
617 0         0 push @vals, $_;
618             }
619             }
620             else {
621 5 100       16 last if $bool == 0;
622             }
623             }
624 2 50       7 return wantarray ? ( \@vals, $bool ) : $bool;
625             }
626            
627             =head3 equal_n
628            
629             $num = $dat->equal_n(AREF); # test data are valid before loading them
630             $num = $dat->equal_n(label => STRING); # checking after loading/adding the data (or key in 'index')
631            
632             If the given data or aref of variable names all have the same number of elements, then that number is returned; otherwise 0.
633            
634             =cut
635            
636             sub equal_n {
637 0     0 1 0 my ( $self, %args ) = @_;
638             my $data =
639 0 0       0 $args{'data'} ? delete $args{'data'} : $self->get_hoa_by_lab(%args);
640 0         0 my $n = scalar @{ $data->[0] };
  0         0  
641 0 0       0 return $n if scalar @{$data} == 1;
  0         0  
642 0         0 for ( 1 .. scalar @{$data} - 1 ) {
  0         0  
643 0 0       0 if ( $n != scalar @{ $data->[$_] } ) {
  0         0  
644 0         0 $n = 0;
645 0         0 last;
646             }
647             }
648 0         0 return $n;
649             }
650            
651             =head3 idx_anumeric
652            
653             $aref = $dat->idx_anumeric(AREF); # test data are valid before loading them
654             $aref = $dat->idx_anumeric(label => STRING); # checking after loading/adding the data (or key in 'index')
655            
656             Given an aref (or the label or index by which it was previously loaded), returns a reference to an array of indices for that array where the values are either undefined, empty or non-numerical.
657            
658             =cut
659            
660             sub idx_anumeric
661             { # List keyed by sample-names of their indices where invalid values lie
662 0     0 1 0 my ( $self, %args ) = @_;
663             my $data =
664 0 0       0 $args{'data'} ? delete $args{'data'} : $self->get_hoa_by_lab(%args);
665 0         0 my @purge = ();
666 0         0 for my $i ( 0 .. scalar @{$data} - 1 ) {
  0         0  
667 0 0       0 if ( _nan( $data->[$i] ) ) {
668 0         0 push @purge, $i;
669             }
670             }
671 0         0 return \@purge;
672             }
673            
674             =head2 Dumping data
675            
676             =head3 dump_vals
677            
678             $seq->dump_vals(delim => ", "); # assumes the first (only?) loaded array should be dumped
679             $seq->dump_vals(index => INT, delim => ", "); # dump the i'th loaded array
680             $seq->dump_vals(label => STRING, delim => ", "); # dump the array loaded/added with the given "label"
681            
682             Prints to STDOUT a space-separated line (ending with "\n") of a loaded/added data's elements. Optionally, give a value for B to specify how the elements in each array should be separated; default is a single space.
683            
684             =cut
685            
686             sub dump_vals {
687 0     0 1 0 my ( $self, @args ) = @_;
688 0 0       0 my $args = ref $args[0] ? $args[0] : {@args};
689 0   0     0 my $delim = $args->{'delim'} || q{ };
690 0 0       0 print {*STDOUT} join( $delim, @{ $self->access($args) } ), "\n"
  0         0  
  0         0  
691             or croak 'Could not print line to STDOUT';
692 0         0 return 1;
693             }
694            
695             =head3 dump_list
696            
697             Dumps a list (using L) of the data currently loaded, without showing their actual elements. List is firstly by index, then by label (if any), then gives the number of elements in the associated array.
698            
699             =cut
700            
701             sub dump_list {
702 0     0 1 0 my $self = shift;
703 0         0 my ( $lim, $lab, $N, $len_lab, $len_n, $tbl, @rows, @maxlens ) = ();
704 0         0 $lim = $self->ndata();
705 0 0       0 @maxlens = ( ( $lim > 5 ? $lim : 5 ), 5, 1 );
706 0         0 for my $i ( 0 .. $lim - 1 ) {
707             $lab =
708             defined $self->{_DATA}->[$i]->{lab}
709             ? $self->{_DATA}->[$i]->{lab}
710 0 0       0 : q{-};
711 0         0 $N = scalar @{ $self->{_DATA}->[$i]->{seq} };
  0         0  
712 0         0 $len_lab = length $lab;
713 0         0 $len_n = length $N;
714 0 0       0 if ( $len_lab > $maxlens[1] ) {
715 0         0 $maxlens[1] = $len_lab;
716             }
717 0 0       0 if ( $len_n > $maxlens[2] ) {
718 0         0 $maxlens[2] = $len_n;
719             }
720 0         0 $rows[$i] = [ $i, $lab, $N ];
721             }
722 0         0 require Text::SimpleTable;
723 0         0 $tbl = Text::SimpleTable->new(
724             [ $maxlens[0], 'index' ],
725             [ $maxlens[1], 'label' ],
726             [ $maxlens[2], 'N' ]
727             );
728 0         0 $tbl->row( @{$_} ) for @rows;
  0         0  
729 0 0       0 print {*STDOUT} $tbl->draw or croak 'Could not print list of loaded data';
  0         0  
730 0         0 return 1;
731             }
732            
733             # PRIVATE METHODS:
734            
735             sub _cull_hoa_indep {
736 1     1   2 my $hoa = shift;
737 1         1 my $purged_n = shift;
738 1         2 my ( $purged, %purged_data ) = 0;
739 1         1 for my $name ( keys %{$hoa} ) {
  1         3  
740 2         3 my @clean = ();
741 2         2 for my $i ( 0 .. scalar( @{ $hoa->{$name} } ) - 1 ) {
  2         4  
742 7 100       10 if ( _nan( $hoa->{$name}->[$i] ) ) {
743 3         3 $purged++;
744             }
745             else {
746 4         5 push @clean, $hoa->{$name}->[$i];
747             }
748             }
749             croak
750 2 50       5 "Empty data for ANOVA following purge of invalid value(s) in list < $name >"
751             if !scalar @clean;
752 2         13 $purged_data{$name} = [@clean];
753             }
754 1         2 ${$purged_n} = $purged;
  1         2  
755 1         2 return \%purged_data;
756             }
757            
758             sub _cull_hoa_across {
759 2     2   2 my $hoa = shift;
760 2         3 my $purged_n = shift;
761 2         3 my ( $purged, %invalid_i_by_name, %invalid_idx, %clean, %purged_data ) = ();
762            
763 2         3 for my $name ( keys %{$hoa} ) {
  2         4  
764 5         5 for my $i ( 0 .. scalar( @{ $hoa->{$name} } ) - 1 ) {
  5         6  
765 23 100       22 if ( _nan( $hoa->{$name}->[$i] ) ) {
766 7         13 $invalid_i_by_name{$name}->{$i} = 1;
767             }
768             }
769             }
770            
771             # List all indices in all lists with invalid values;
772             # and copy each group of data for cleaning:
773 2         1 for my $name ( keys %{$hoa} ) {
  2         5  
774 5         6 $clean{$name} = $hoa->{$name};
775 5         3 while ( my ( $idx, $val ) = each %{ $invalid_i_by_name{$name} } ) {
  12         41  
776 7         9 $invalid_idx{$idx} += $val;
777             }
778             }
779 2   50     6 $purged = ( scalar keys(%invalid_idx) ) || 0;
780            
781             # Purge by index (from highest to lowest):
782 2         9 for my $idx ( reverse sort { $a <=> $b } keys %invalid_idx ) {
  6         12  
783 7         9 for my $name ( keys %clean ) {
784 18 100       11 if ( $idx < scalar @{ $clean{$name} } ) {
  18         24  
785 15         9 splice @{ $clean{$name} }, $idx, 1;
  15         18  
786             }
787             }
788             }
789            
790 2         4 for my $c ( keys %clean ) {
791 5         6 $purged_data{$c} = $clean{$c};
792             }
793 2         3 ${$purged_n} = $purged;
  2         1  
794 2         10 return \%purged_data;
795             }
796            
797             sub _init_data {
798 34     34   43 my ( $self, @args ) = @_;
799            
800 34         43 my $tmp = {};
801 34 100       66 if ( _isa_hashref_of_arefs( $args[0] ) ) { # cases 4 & 6
    100          
    100          
802 5         11 $tmp = _init_labelled_data( $self, $args[0] );
803             }
804             elsif ( _isa_hash_of_arefs(@args) ) { # cases 3 & 5
805 19         62 $tmp = _init_labelled_data( $self, {@args} );
806             }
807             elsif ( _isa_array_of_arefs(@args) ) { # cases 2 & 7
808 8         15 $tmp = _init_unlabelled_data(@args);
809             }
810             else { # assume @args is just a list of strings - case 1
811 2 50       5 if ( ref $args[0] ) {
812 0         0 croak
813             'Don\'t know how to load/add the given data: Need to be in the structure of HOA (referenced or not), or an unreferenced AOA';
814             }
815             else {
816 2         10 $tmp->{0} = { seq => [@args], lab => undef };
817             }
818             }
819            
820             #carp 'Empty array of data is being loaded/added' if any { ! scalar @{$tmp->{$_}->{'seq'}} } keys %{$tmp};
821 34         93 return $tmp;
822             }
823            
824             sub _isa_hashref_of_arefs {
825 34     34   40 my $arg = shift;
826 34 100 100     145 if ( not ref $arg or ref $arg ne 'HASH' ) {
827 29         91 return 0;
828             }
829             else {
830 5         7 return _isa_hash_of_arefs( %{$arg} );
  5         16  
831             }
832             }
833            
834             sub _isa_hash_of_arefs {
835            
836             # determines that:
837             # - scalar @args passes Number::Misc is_even, then that:
838             # - every odd indexed value 'hascontent' via String::Util
839             # - every even indexed value is aref
840 34     34   52 my @args = @_;
841 34         34 my $bool = 0;
842 34 100       104 if ( is_even( scalar @args ) )
843             { # Number::Misc method - not odd number in assignment
844 26         636 my %args = @args; # so assume is hash
845             HASH_CHECK:
846 26         88 while ( my ( $lab, $val ) = each %args ) {
847 36 100 66     94 if ( hascontent($lab) && ref $val eq 'ARRAY' ) {
848 34         322 $bool = 1;
849             }
850             else {
851 2         30 $bool = 0;
852             }
853 36 100       208 last HASH_CHECK if $bool == 0;
854             }
855             }
856             else {
857 8         172 $bool = 0;
858             }
859 34         98 return $bool;
860             }
861            
862             sub _isa_array_of_arefs {
863 10     10   17 my @args = @_;
864 10 100   10   70 if ( all { ref $_ eq 'ARRAY' } @args ) {
  10         35  
865 8         18 return 1;
866             }
867             else {
868 2         4 return 0;
869             }
870             }
871            
872             sub _init_labelled_data {
873 24     24   29 my ( $self, $href ) = @_;
874 24         20 my ( $i, %tmp ) = ( scalar @{ $self->{_DATA} } );
  24         58  
875 24         26 while ( my ( $lab, $seq ) = each %{$href} ) {
  58         137  
876 34         63 my $j = _seq_index_by_label( $self, $lab );
877 34 100       49 if ( defined $j )
878             { # already a label for these data, so don't need to define it for this init
879 7         8 $tmp{$j} = { seq => [ @{$seq} ], lab => undef };
  7         36  
880             }
881             else { # no aref labelled $lab yet: define for seq and label
882 27         26 $tmp{ $i++ } = { seq => [ @{$seq} ], lab => $lab };
  27         120  
883             }
884             }
885 24         45 return \%tmp;
886             }
887            
888             sub _init_unlabelled_data {
889 8     8   11 my @args = @_;
890 8         15 my %tmp = ();
891 8         24 for my $i ( 0 .. scalar @args - 1 ) {
892 8         10 $tmp{$i} = { seq => [ @{ $args[$i] } ], lab => undef };
  8         51  
893             }
894 8         16 return \%tmp;
895             }
896            
897             sub _index_by_args {
898 29     29   39 my ( $self, @args ) = @_;
899 29         25 my $i;
900 29 100       50 if ( !$args[0] ) {
901 10         9 $i = 0;
902             }
903             else {
904 19 50       53 my $args = ref $args[0] ? $args[0] : {@args};
905 19 50       66 if ( hascontent( $args->{'index'} ) ) { # assume is_int
    50          
906 0         0 $i = $args->{'index'};
907             }
908             elsif ( hascontent( $args->{'label'} ) ) {
909 19         225 $i = _seq_index_by_label( $self, $args->{'label'} );
910             }
911             else {
912 0         0 $i = 0;
913             }
914             }
915 29         65 return $i;
916             }
917            
918             sub _seq_index_by_label {
919 68     68   67 my ( $self, $label ) = @_;
920 68         61 my ( $i, $k ) = ( 0, 0 );
921 68         57 for ( ; $i < scalar( @{ $self->{_DATA} } ) ; $i++ ) {
  85         156  
922 55 100 66     238 if ( $self->{_DATA}->[$i]->{lab}
923             and $self->{_DATA}->[$i]->{lab} eq $label )
924             {
925 38         34 $k++;
926 38         44 last;
927             }
928             }
929 68 100       135 return $k ? $i : undef;
930             }
931            
932             sub _add_from_object_aref {
933 1     1   1 my ( $self, $aref ) = @_;
934 1         2 for my $dat ( @{$aref} ) {
  1         2  
935 2 50       4 if ( hascontent( $dat->{'lab'} ) ) {
936 2         14 $self->add( $dat->{'lab'} => $dat->{'seq'} );
937             }
938             else {
939 0         0 $self->add( $dat->{'seq'} );
940             }
941             }
942 1         1 return 1;
943             }
944            
945             sub _nan {
946 88 100   88   187 return !looks_like_number(shift) ? 1 : 0;
947             }
948            
949             ## Deprecated/obsolete methods:
950             sub load_from_file {
951 0     0 0   croak __PACKAGE__
952             . ': load_from_file() method is obsolete from v.10; read-in and save data by your own methods';
953             }
954            
955             sub save_to_file {
956 0     0 0   croak __PACKAGE__
957             . ': load_from_file() method is obsolete from v.10; read-in and save data by your own methods';
958             }
959            
960             =head1 EXAMPLES
961            
962             B<1. Multivariate data>
963            
964             In a study of how doing mental arithmetic affects arousal in self and others, three male frogs were maths-trained and then, as they did their calculations, were measured for pupillary dilation and perceived attractiveness. After four runs, average measures per frog can be loaded:
965            
966             $frogs->load(Names => [qw/Freddo Kermit Larry/], Pupil => [59.2, 77.7, 56.1], Attract => [3.11, 8.79, 6.99]);
967            
968             But one more frog still had to graduate from training, and data are now ready for loading:
969            
970             $frogs->add(Names => ['Sleepy'], Pupil => [83.4], Attract => [5.30]);
971             $frogs->dump_data(label => 'Pupil'); # prints "59.2 77.7 56.1 83.4" : all 4 frogs' pupil data for analysis by some module
972            
973             Another frog has been trained, measures taken:
974            
975             $frogs->add(Pupil => [93], Attract => [6.47], Names => ['Jack']); # add yet another frog's data
976             $frogs->dump_data(label => 'Pupil'); # prints "59.2 77.7 56.1 83.4 93": all 5 frogs' pupil data
977            
978             Now we run another experiment, taking measures of heart-rate, and can add them to the current load of data for analysis:
979            
980             $frogs->add(Heartrate => [.70, .50, .44, .67, .66]); # add entire new array for all frogs
981             print "heartrate data are bung" if ! $frogs->all_proportions(label => 'Heartrate'); # validity check (could do before add)
982             $frogs->dump_list(); # see all four data-arrays now loaded, each with 5 observations (1 per frog), i.e.:
983             .-------+-----------+----.
984             | index | label | N |
985             +-------+-----------+----+
986             | 0 | Names | 5 |
987             | 1 | Attract | 5 |
988             | 2 | Pupil | 5 |
989             | 3 | Heartrate | 5 |
990             '-------+-----------+----'
991            
992             B<2. Using as a base module>
993            
994             As L, and so its sub-modules, use this module as their base, it doesn't have to do much data-managing itself:
995            
996             use Statistics::Sequences;
997             my $seq = Statistics::Sequences->new();
998             $seq->load(qw/f b f b b/); # using Statistics::Data method
999             say $seq->p_value(stat => 'runs', exact => 1); # using Statistics::Sequences::Runs method
1000            
1001             Or if these data were loaded directly within Statistics::Data, the data can be shared around modules that use it as a base:
1002            
1003             use Statistics::Data;
1004             use Statistics::Sequences::Runs;
1005             my $dat = Statistics::Data->new();
1006             my $runs = Statistics::Sequences::Runs->new();
1007             $dat->load(qw/f b f b b/);
1008             $runs->pass($dat);
1009             say $runs->p_value(exact => 1);
1010            
1011             =head1 DIAGNOSTICS
1012            
1013             =over 4
1014            
1015             =item Don't know how to load/add the given data
1016            
1017             Croaked when attempting to load or add data with an unsupported data structure where the first argument is a reference. See the examples under L for valid (and invalid) ways of sending data to them.
1018            
1019             =item Data for accessing need to be loaded
1020            
1021             Croaked when calling L, or any methods that use it internally -- viz., L and the validity checks L -- when it is called with a label for data that have not been loaded, or did not load successfully.
1022            
1023             =item Data for unloading need to be loaded
1024            
1025             Croaked when calling L with an index or a label attribute and the data these refer to have not been loaded, or did not load successfully.
1026            
1027             =back
1028            
1029             =head1 DEPENDENCIES
1030            
1031             L - used for its C method when testing loads
1032            
1033             L - used for its C method when testing loads
1034            
1035             L - used for its C and C methods
1036            
1037             L - required for L
1038            
1039             L - required for L
1040            
1041             =head1 BUGS AND LIMITATIONS
1042            
1043             Some methods rely on accessing previously loaded data but should permit performing their operations on data submitted directly to them, just like, e.g., $dat->all_numeric(\@data) is ok. This is handled for now internally, but should be handled in the same way by modules using this one as its base - for at the moment they have to check for an aref to their data-manipulating methods ahead of accessing any loaded data by this module.
1044            
1045             Please report any bugs or feature requests to C, or through the web interface at L. This will notify the author, and then you'll automatically be notified of progress on your bug as any changes are made.
1046            
1047             =head1 SUPPORT
1048            
1049             You can find documentation for this module with the perldoc command.
1050            
1051             perldoc Statistics::Data
1052            
1053             You can also look for information at:
1054            
1055             =over 4
1056            
1057             =item * RT: CPAN's request tracker
1058            
1059             L
1060            
1061             =item * AnnoCPAN: Annotated CPAN documentation
1062            
1063             L
1064            
1065             =item * CPAN Ratings
1066            
1067             L
1068            
1069             =item * Search CPAN
1070            
1071             L
1072            
1073             =back
1074            
1075             =head1 AUTHOR
1076            
1077             Roderick Garton, C<< >>
1078            
1079             =head1 LICENSE AND COPYRIGHT
1080            
1081             Copyright 2009-2017 Roderick Garton
1082            
1083             This program is free software; you can redistribute it and/or modify it under the terms of either: the GNU General Public License as published
1084             by the Free Software Foundation; or the Artistic License. See L for more information.
1085            
1086             =cut
1087            
1088             1; # End of Statistics::Data