File Coverage

blib/lib/CXC/PDL/Bin1D.pm
Criterion Covered Total %
statement 146 159 91.8
branch 22 44 50.0
condition 37 53 69.8
subroutine 27 27 100.0
pod 2 2 100.0
total 234 285 82.1


line stmt bran cond sub pod time code
1             #
2             # GENERATED WITH PDL::PP from lib/CXC/PDL/Bin1D.pd! Don't modify!
3             #
4             package CXC::PDL::Bin1D;
5              
6             our @EXPORT_OK = qw( );
7             our %EXPORT_TAGS = (Func=>\@EXPORT_OK);
8              
9 4     4   1909132 use PDL::Core();
  4         9  
  4         139  
10 4     4   28 use PDL::Exporter;
  4         16  
  4         54  
11 4     4   154 use DynaLoader;
  4         7  
  4         3697  
12              
13              
14             our $VERSION = '0.28';
15             our @ISA = ( 'PDL::Exporter','DynaLoader' );
16             push @PDL::Core::PP, __PACKAGE__;
17             bootstrap CXC::PDL::Bin1D $VERSION;
18              
19              
20              
21              
22              
23              
24              
25              
26             #line 203 "lib/CXC/PDL/Bin1D.pd"
27              
28             use v5.10;
29             use strict;
30             use warnings;
31             no namespace::clean;
32             use CXC::PDL::Bin1D::Utils;
33             use namespace::clean;
34              
35             #line 222 "lib/CXC/PDL/Bin1D.pd"
36             use constant {
37             BIN_RC_OK => 1,
38             BIN_RC_GEWMAX => 2,
39             BIN_RC_GENMAX => 4,
40             BIN_RC_FOLDED => 8,
41             BIN_RC_GTMINSN => 16,
42             BIN_ARG_HAVE_ERROR => 1,
43             BIN_ARG_HAVE_WEIGHT => 2,
44             BIN_ARG_SET_BAD => 4,
45             BIN_ARG_FOLD => 8,
46             BIN_ARG_HAVE_WIDTH => 16,
47             BIN_ARG_ERROR_SDEV => 32,
48             BIN_ARG_ERROR_POISSON => 64,
49             BIN_ARG_ERROR_RSS => 128,
50             BIN_ARG_HAVE_SIGNAL => 256,
51             BIN_ARG_SAVE_OOB_START_END => 512,
52             BIN_ARG_SAVE_OOB_START_NBINS => 1024,
53             BIN_ARG_SHIFT_IMIN => 1536,
54             BIN_ARG_SAVE_OOB_END => 2048,
55             BIN_ARG_SAVE_OOB_NBINS => 4096,
56             BIN_ARG_SAVE_OOB => 7680,
57             BIN_ARG_WANT_EXTREMA => 8192,
58             BIN_ARG_WANT_SUM_WEIGHT => 16384,
59             BIN_ARG_WANT_SUM_WEIGHT2 => 32768,
60             };
61              
62             #line 227 "lib/CXC/PDL/Bin1D.pd"
63              
64             =for Pod::Coverage
65             BIN_RC_OK
66             BIN_RC_GEWMAX
67             BIN_RC_GENMAX
68             BIN_RC_FOLDED
69             BIN_RC_GTMINSN
70             BIN_ARG_HAVE_ERROR
71             BIN_ARG_HAVE_WEIGHT
72             BIN_ARG_SET_BAD
73             BIN_ARG_FOLD
74             BIN_ARG_HAVE_WIDTH
75             BIN_ARG_ERROR_SDEV
76             BIN_ARG_ERROR_POISSON
77             BIN_ARG_ERROR_RSS
78             BIN_ARG_HAVE_SIGNAL
79             BIN_ARG_SAVE_OOB_START_END
80             BIN_ARG_SAVE_OOB_START_NBINS
81             BIN_ARG_SHIFT_IMIN
82             BIN_ARG_SAVE_OOB_END
83             BIN_ARG_SAVE_OOB_NBINS
84             BIN_ARG_SAVE_OOB
85             BIN_ARG_WANT_EXTREMA
86             BIN_ARG_WANT_SUM_WEIGHT
87             BIN_ARG_WANT_SUM_WEIGHT2
88              
89             =cut
90              
91             #line 230 "lib/CXC/PDL/Bin1D.pd"
92             my %MapErrorAlgo = (
93              
94             sdev => BIN_ARG_ERROR_SDEV,
95             rss => BIN_ARG_ERROR_RSS,
96             poisson => BIN_ARG_ERROR_POISSON,
97             );
98              
99             #line 239 "lib/CXC/PDL/Bin1D.pd"
100              
101             =for Pod::Coverage
102             set_boundscheck
103             set_debugging
104              
105             =for stopwords
106             merchantability
107              
108             =cut
109              
110             =head1 NAME
111              
112             CXC::PDL::Bin1D - one dimensional binning functions
113              
114             =head1 SYNOPSIS
115              
116             use PDL;
117             use CXC::PDL::Bin1D;
118              
119             =head1 DESCRIPTION
120              
121             One dimensional binning functions,
122              
123             =over
124              
125             =item *
126              
127             binning up to a given S/N
128              
129             =item *
130              
131             optimized one-pass robust statistics
132              
133             =back
134              
135             All functions are made available in the B namespace.
136              
137             =head1 SUBROUTINES
138              
139             =cut
140              
141             #line 441 "lib/CXC/PDL/Bin1D.pd"
142             $EXPORT_TAGS{constants} = [ qw(
143             BIN_RC_FOLDED
144             BIN_RC_GTMINSN
145             BIN_RC_OK
146             BIN_RC_GENMAX
147             BIN_RC_GEWMAX
148             BIN_ARG_SAVE_OOB_END
149             BIN_ARG_SHIFT_IMIN
150             BIN_ARG_SAVE_OOB
151             BIN_ARG_WANT_EXTREMA
152             BIN_ARG_SAVE_OOB_START_NBINS
153             BIN_ARG_ERROR_RSS
154             BIN_ARG_HAVE_SIGNAL
155             BIN_ARG_ERROR_SDEV
156             BIN_ARG_FOLD
157             BIN_ARG_WANT_SUM_WEIGHT2
158             BIN_ARG_HAVE_WIDTH
159             BIN_ARG_HAVE_WEIGHT
160             BIN_ARG_HAVE_ERROR
161             BIN_ARG_SAVE_OOB_START_END
162             BIN_ARG_SET_BAD
163             BIN_ARG_ERROR_POISSON
164             BIN_ARG_WANT_SUM_WEIGHT
165             BIN_ARG_SAVE_OOB_NBINS
166             )];
167              
168             #line 443 "lib/CXC/PDL/Bin1D.pd"
169             $EXPORT_TAGS{Func} = [ qw(
170             bin_adaptive_snr
171             bin_on_index
172             )];
173              
174             #line 444 "lib/CXC/PDL/Bin1D.pd"
175             @EXPORT_OK = map { @{$_} } values %EXPORT_TAGS;
176             #line 177 "Bin1D.pm"
177              
178 4     4   37 no namespace::clean;
  4         6  
  4         21  
179              
180 4     4   3698 use Types::Common::Numeric qw[ PositiveNum PositiveInt PositiveOrZeroInt ];
  4         685366  
  4         41  
181 4     4   5309 use Types::Standard qw[ Optional InstanceOf slurpy Dict Bool Enum ];
  4         8  
  4         27  
182 4     4   25055 use Type::Params qw[ compile ];
  4         24566  
  4         47  
183              
184 4     4   1877 use Carp ();
  4         9  
  4         105  
185 4     4   20 use PDL::Lite ();
  4         8  
  4         83  
186              
187 4     4   18 use namespace::clean;
  4         7  
  4         36  
188              
189 4     4   2687 use constant PP_PASS_TMPS => do { require version; PDL->VERSION < version->parse( '2.073' ) };
  4         8  
  4         8  
  4         1362  
  4         7808  
190              
191             my $bin_adaptive_snr_check;
192              
193             BEGIN {
194              
195 4     4   37 $bin_adaptive_snr_check = compile(
196             slurpy Dict [
197             signal => InstanceOf ['PDL'],
198             error => Optional [ InstanceOf ['PDL'] ],
199             width => Optional [ InstanceOf ['PDL'] ],
200             min_snr => PositiveNum,
201             min_nelem => Optional [PositiveInt],
202             max_nelem => Optional [PositiveInt],
203             min_width => Optional [PositiveNum],
204             max_width => Optional [PositiveNum],
205             fold => Optional [Bool],
206             error_algo => Optional [ Enum [ keys %MapErrorAlgo ] ],
207             set_bad => Optional [Bool],
208             ] );
209             }
210              
211             ## no critic (Subroutines::ProhibitExcessComplexity)
212             sub bin_adaptive_snr {
213              
214 35     35 1 4364930 my ( $opts ) = $bin_adaptive_snr_check->( @_ );
215              
216             # specify defaults
217 35         4562 my %opt = (
218             error_algo => 'sdev',
219             min_nelem => 1,
220             %$opts,
221             );
222              
223             Carp::croak( "width must be specified if either of min_width or max_width is specified\n" )
224             if ( defined $opt{min_width} || defined $opt{max_width} )
225 35 50 100     458 && !defined $opt{width};
      66        
226              
227             Carp::croak( "must specify error attribute if 'rss' errors selected\n" )
228 35 50 66     175 if !defined $opt{error} && $opt{error_algo} eq 'rss';
229              
230 35   100     172 $opt{min_width} ||= 0;
231 35   100     142 $opt{max_width} ||= 0;
232 35   100     156 $opt{max_nelem} ||= 0;
233              
234             # if the user hasn't specified whether to fold the last bin,
235             # turn it on if there aren't *maximum* constraints
236             $opt{fold} = !defined $opt{max_width} || !defined $opt{max_nelem}
237 35 100 33     212 unless defined $opt{fold};
238              
239             $opt{flags}
240             = ( ( defined $opt{error} && BIN_ARG_HAVE_ERROR ) || 0 )
241             | ( ( defined $opt{width} && BIN_ARG_HAVE_WIDTH ) || 0 )
242             | ( ( $opt{fold} && BIN_ARG_FOLD ) || 0 ) | ( ( $opt{set_bad} && BIN_ARG_SET_BAD ) || 0 )
243 35   100     618 | $MapErrorAlgo{ $opt{error_algo} };
      100        
      100        
      50        
244              
245 35         145 my @pin = qw[ signal error width ];
246 35         274 my @pout = qw[ index nbins nelem b_signal b_error b_mean b_snr
247             b_width ifirst ilast rc ];
248 35         145 my @oargs = qw[ flags min_snr min_nelem max_nelem min_width max_width ];
249              
250             # several of the input piddles are optional. the PP routine
251             # doesn't know that and will complain about the wrong number of
252             # dimensions if we pass a null piddle. A 1D zero element piddle
253             # will have its dimensions auto-expanded without much
254             # wasted memory.
255 35         73 $opt{$_} = PDL->new( 0 ) for grep { !defined $opt{$_} } @pin;
  105         368  
256 35         1541 $opt{$_} = PDL->null for grep { !defined $opt{$_} } @pout;
  385         727  
257              
258 35         3226 my @pars;
259 35         58 if ( PP_PASS_TMPS ) {
260             my @ptmp = qw[ berror2 bsignal2 b_m2 b_weight b_weight_sig b_weight_sig2 ];
261             $opt{$_} = PDL->null for grep { !defined $opt{$_} } @ptmp;
262             @pars = ( @pin, @pout, @ptmp, @oargs );
263             }
264             else {
265 35         231 @pars = ( @pin, @pout, @oargs );
266             }
267              
268 35         5267 _bin_adaptive_snr_int( @opt{@pars} );
269              
270             my %results = map { ## no critic (BuiltinFunctions::ProhibitComplexMappings)
271 35         129 ( my $nkey = $_ ) =~ s/^b_//;
  385         758  
272 385         1021 ( $nkey, $opt{$_} )
273             } @pout;
274              
275 35 50       667 return wantarray ? %results : \%results;
276             }
277              
278             =pod
279              
280             =head2 bin_adaptive_snr
281              
282             =for usage
283              
284             %hash = bin_adaptive_snr( %options );
285              
286             =for stopwords
287             Adaptively
288              
289             =for ref
290              
291             Adaptively bin a data set to achieve a minimum signal to noise ratio
292             in each bin.
293              
294             This routine ignores data with bad values or with errors that have
295             bad values.
296              
297             B groups data into bins such that each bin meets
298             one or more conditions:
299              
300             =over
301              
302             =item *
303              
304             a minimum signal to noise ratio (S/N).
305              
306             =item *
307              
308             A minimum number of data elements (optional).
309              
310             =item *
311              
312             A maximum number of data elements (optional).
313              
314             =item *
315              
316             A maximum data I (see below) (optional).
317              
318             =item *
319              
320             A minimum data I (see below) (optional).
321              
322             =back
323              
324             The data are typically dependent values (e.g. flux as a function of
325             energy or counts as a function of radius). The data should be sorted
326             by the independent variable (e.g. energy or radius).
327              
328             Calculation of the S/N requires an estimate of the error associated
329             with each datum. The error may be provided or may be estimated from
330             the population using either the number of data elements in a bin
331             (e.g. Poisson errors) or the standard deviation of the signal in a bin.
332             If errors are provided, they may be used to weight the population standard
333             deviation or may be added in quadrature.
334              
335             Binning begins at the start of the signal vector. Data are accumulated
336             into a bin until one or more of the possible criteria is met. If the
337             final bin does not meet the required criteria, it may optionally be
338             successively folded into preceding bins until the final bin passes the
339             criteria or there are no bins left.
340              
341             Each datum may be assigned an extra parameter, its I,
342             which is summed for each bin, and can be used as an additional constraint
343             on bin membership.
344              
345             =head3 Parameters
346              
347             B is passed a hash or a reference to a hash containing
348             its parameters. The available parameters are:
349              
350             =over
351              
352             =item C
353              
354             A piddle containing the signal data. This is required.
355              
356             =item C
357              
358             A piddle with the error for signal datum. Optional.
359              
360             =item C
361              
362             A piddle with the I of each element of the signal. Optional.
363              
364             =item C
365              
366             A string indicating how the error is to be handled or calculated. It
367             may be have one of the following values:
368              
369             =over
370              
371             =item * C
372              
373             Poisson errors will be calculated based upon the number of elements in a bin,
374              
375             error**2 = N
376              
377             Any input errors are ignored.
378              
379             =item * C
380              
381             The error is the population standard deviation of the signal in a bin.
382              
383             error**2 = Sum [ ( signal - mean ) **2 ] / ( N - 1 )
384              
385             If errors are provided, they are used to calculated the weighted population
386             standard deviation.
387              
388             error**2 = ( Sum [ (signal/error)**2 ] / Sum [ 1/error**2 ] - mean**2 )
389             * N / ( N - 1 )
390              
391             =item * C
392              
393             Errors must be provided; the errors of elements in a bin are added in
394             quadrature.
395              
396             =back
397              
398             =item C
399              
400             The minimum signal to noise ratio to be achieved in each bin. Required.
401              
402             =item C
403              
404             =item C
405              
406             The minimum and/or maximum number of elements to be achieved in each bin. Optional
407              
408             =item C
409              
410             =item C
411              
412             The minimum and/or maximum width of the elements to be achieved in each bin. Optional.
413              
414             =item C I
415              
416             If true, the last bin may be folded into the preceding bin in order to
417             ensure that the last bin meets one or more of the criteria. It defaults to false.
418              
419             =back
420              
421             =head3 Results
422              
423             B returns a hashref with the following entries:
424              
425             =over
426              
427             =item C
428              
429             A piddle containing the bin indices for the elements in the input
430             data piddle. Data which were skipped because of bad values will have
431             their index set to the bad value.
432              
433             =item C
434              
435             A piddle containing the number of bins which spanned the range of the
436             input data.
437              
438             =item C
439              
440             A piddle containing the sum of the data values in each bin. Only
441             indices C<0> through C are valid.
442              
443             =item C
444              
445             A piddle containing the number of data elements in each bin. Only
446             indices C<0> through C are valid.
447              
448             =item C
449              
450             A piddle containing the errors in each bin, calculated using the
451             algorithm specified via C. Only indices C<0> through
452             C are valid.
453              
454             =item C
455              
456             A piddle containing the weighted mean of the signal in each bin. Only
457             indices C<0> through C are valid.
458              
459             =item C
460              
461             A piddle containing the index into the input data piddle of the first
462             data value in a bin. Only indices C<0> through C are valid.
463              
464             =item C
465              
466             A piddle containing the index into the input data piddle of the last
467             data value in a bin. Only indices C<0> through C are valid.
468              
469             =item C
470              
471             A piddle containing a results code for each output bin. Only indices
472             C<0> through C are valid. The code is the bitwise "or" of
473             the following constants (available in the C
474             namespace)
475              
476             =over
477              
478             =item BIN_RC_OK
479              
480             The bin met the minimum S/N, data element count and weight requirements
481              
482             =item BIN_RC_GEWMAX
483              
484             The bin weight was greater or equal to that requested.
485              
486             =item BIN_RC_GENMAX
487              
488             The number of data elements was greater or equal to that requested.
489              
490             =item BIN_RC_FOLDED
491              
492             The bin is the result of folding bins at the end of the bin vector to
493             achieve a minimum S/N.
494              
495             =item BIN_RC_GTMINSN
496              
497             The bin accumulated more data elements than was necessary to meet the
498             S/N requirements. This results from constraints on the minimum number
499             of data elements or bin weight.
500              
501             =back
502              
503             =back
504              
505             =cut
506              
507              
508              
509              
510              
511              
512              
513              
514 4     4   725274 use v5.10;
  4         18  
515 4     4   26 no namespace::clean;
  4         6  
  4         39  
516              
517 4     4   1870 use Types::Common::Numeric qw[ PositiveOrZeroInt ];
  4         10  
  4         45  
518 4     4   4324 use Types::Standard qw[ Optional Bool StrMatch Undef Int ];
  4         20  
  4         83  
519 4     4   17450 use Types::PDL -types;
  4         330386  
  4         79  
520 4     4   11568 use Type::Params qw[ compile_named ];
  4         8  
  4         30  
521              
522 4     4   1699 use Carp ();
  4         8  
  4         72  
523 4     4   21 use PDL::Lite ();
  4         7  
  4         193  
524              
525 4     4   2242 use Hash::Wrap ( { -class => __PACKAGE__ . '::bin_on_index' } );
  4         34520  
  4         44  
526              
527 4     4   6135 use constant PP_PASS_TMPS => do { require version; PDL->VERSION < version->parse( '2.073' ) };
  4         7  
  4         7  
  4         17  
  4         947  
528              
529             # this is ugly
530             sub CoercedPiddle {
531              
532             my $to_type = shift;
533              
534             my $convert = sprintf(
535             <<'EOS',
536             do { local $@;
537             require PDL::Core;
538             my $new = eval { PDL::Core::convert( $_, PDL::%s ) };
539             length($@) ? $_ : $new
540             }
541             EOS
542             $to_type,
543             );
544             return Piddle( [ type => $to_type ] )->plus_coercions( map { $_ => $convert } Piddle, @_ );
545             }
546              
547 4     4   30 use namespace::clean;
  4         8  
  4         35  
548              
549             {
550              
551             my $check;
552             my $range_RE;
553              
554             BEGIN {
555 4     4   4403 $range_RE = qr/(flat|slice)(?:,(minmax|min|max))?/;
556              
557 4         14 $check = compile_named(
558             index => CoercedPiddle( 'indx' ),
559             data => Piddle,
560             nbins => Optional [ CoercedPiddle( indx => PositiveOrZeroInt ) ],
561             weight => Optional [ Piddle | Undef ],
562             # Enum is preferred for the following, but see https://rt.cpan.org/Ticket/Display.html?id=129729
563             oob => Optional [ StrMatch( [qr/^start-end|start-nbins|end|nbins$/] ) | Bool ] => { default => 0 },
564             want_sum_weight => Optional [Bool],
565             want_sum_weight2 => Optional [Bool],
566             imin => Optional [ CoercedPiddle( indx => Int ) ],
567             imax => Optional [ CoercedPiddle( indx => Int ) ],
568             range => Optional [ StrMatch( [$range_RE] ) ] );
569             }
570              
571             my %Map_OOB = (
572             'start-end' => BIN_ARG_SAVE_OOB_START_END,
573             'start-nbins' => BIN_ARG_SAVE_OOB_START_NBINS,
574             'end' => BIN_ARG_SAVE_OOB_END,
575             'nbins' => BIN_ARG_SAVE_OOB_NBINS,
576             );
577              
578             sub bin_on_index {
579              
580 50     50 1 585743304 my $opt = $check->( @_ );
581              
582 50         14394 _bin_on_index_parse_range_opts( $opt );
583              
584             $opt->{flags}
585             = ( ( defined $opt->{weight} && BIN_ARG_HAVE_WEIGHT ) || 0 )
586             | ( ( $opt->{want_sum_weight} && BIN_ARG_WANT_SUM_WEIGHT ) || 0 )
587             | ( ( $opt->{want_sum_weight2} && BIN_ARG_WANT_SUM_WEIGHT2 ) || 0 )
588 50   50     3237 | ( ( $opt->{want_extrema} && BIN_ARG_WANT_EXTREMA ) || 0 );
      50        
      50        
      50        
589              
590             $opt->{flags}
591             |= $Map_OOB{ lc( $opt->{oob} ) } # prefer fc, but requires 5.16
592 50   100     644 // ( $opt->{oob} && BIN_ARG_SAVE_OOB_START_END )
593             || 0;
594              
595 50         410 my @pin = qw[ data index weight imin nbins ];
596 50         418 my @pout = qw[ b_count b_data b_weight b_weight2 b_mean b_dmin b_dmax b_dev2 ];
597 50         182 my @oargs = qw[ flags nbins_max ];
598              
599             # several of the input piddles are optional. the PP routine
600             # doesn't know that and will complain about the wrong number of
601             # dimensions if we pass a null piddle. A 1D one-element piddle
602             # will have its dimensions auto-expanded without much
603             # wasted memory.
604              
605 50         125 $opt->{$_} = $opt->{data}->zeroes( 1 ) for grep { !defined $opt->{$_} } @pin;
  250         1171  
606              
607             # output and temp piddles will be auto-inflated, so we can use
608             # null piddles. In a perfect world PDL::PP would have a mechanism
609             # to avoid inflating the optional ones.
610             # TODO: use nullcreate here to get correct piddle type?
611              
612 50         1474648 $opt->{$_} = PDL->null for grep { !defined $opt->{$_} } @pout;
  400         1349  
613              
614 50         3892 my @pars;
615 50         94 if ( PP_PASS_TMPS ) {
616             my @ptmp = qw[ b_data_error b_weight_error b_weight2_error ];
617             $opt->{$_} = PDL->null for grep { !defined $opt->{$_} } @ptmp;
618             @pars = ( @pin, @pout, @ptmp, @oargs );
619             }
620             else {
621 50         433 @pars = ( @pin, @pout, @oargs );
622             }
623              
624 50         188 _bin_on_index_int( @{$opt}{@pars} );
  50         923  
625             my %results = map { ## no critic (BuiltinFunctions::ProhibitComplexMappings)
626 50         23565 ( my $nkey = $_ ) =~ s/^b_//;
  400         1133  
627 400         1312 ( $nkey, $opt->{$_} )
628             } @pout;
629              
630 50 50       296 if ( $opt->{flags} & BIN_ARG_HAVE_WEIGHT ) {
631             delete $results{weight}
632 0 0       0 unless $opt->{flags} & BIN_ARG_WANT_SUM_WEIGHT;
633             delete $results{weight2}
634 0 0       0 unless $opt->{flags} & BIN_ARG_WANT_SUM_WEIGHT2;
635             }
636             else {
637 50         237 delete @results{qw( weight weight2)};
638             }
639              
640 50         199 $results{imin} = $opt->{imin};
641 50         254 $results{nbins} = $opt->{nbins};
642              
643 50         2774 return wrap_hash( \%results );
644             }
645              
646             sub _bin_on_index_parse_range_opts {
647              
648 50     50   156 my $opt = shift;
649              
650 50 100       236 if ( $opt->{range} ) {
651              
652 30         459 my ( $dims, $range ) = $opt->{range} =~ $range_RE;
653 30   50     116 $range //= 'minmax';
654              
655             eval {
656              
657             # treat the index as a one big pool of numbers, or
658             # as threaded piddles.
659             my ( $imin, $imax )
660             = $dims eq 'slice'
661 80         2639 ? map { $_->dummy( 0 ) } $opt->{index}->minmaximum
662 30 100       1057 : map { PDL::indx( [$_] ) } $opt->{index}->minmax;
  20         1212  
663              
664 30 100       1245 if ( $range eq 'minmax' ) {
    100          
    50          
665              
666             die( "do not specify $_ when specifying 'minmax'\range with " )
667 6         14 for grep { defined $opt->{$_} } qw[ imin imax nbins ];
  18         62  
668              
669 6         16 $opt->{imin} = $imin;
670 6         27 $opt->{imax} = $imax;
671             }
672              
673             elsif ( $range eq 'min' ) {
674              
675             die( "do not specify imin when specifying 'min'\n" )
676 12 50       58 if defined $opt->{imin};
677              
678             die( "specify exactly one of 'imax' or 'nbins'\n" )
679 12 50 66     97 if defined $opt->{imax} && defined $opt->{nbins};
680              
681 12         33 $opt->{imin} = $imin;
682             }
683              
684             elsif ( $range eq 'max' ) {
685             die( "do not specify imax\n" )
686 12 50       40 if defined $opt->{imax};
687              
688             die( "specify exactly one of 'imin' or 'nbins'\n" )
689 12 50 66     89 if defined $opt->{imin} && defined $opt->{nbins};
690              
691 12         327 $opt->{imax} = $imax;
692             }
693 30         183 1;
694 30   33     82 } // do {
695 0         0 my $error = $@;
696 0         0 chomp $error;
697 0         0 Carp::croak( $opt->{range}, ": error: $error\n" );
698             };
699              
700             }
701             else {
702              
703 20         62 my $nrange_opts = grep { defined $opt->{$_} } qw[ imin imax nbins ];
  60         144  
704              
705 20 50       153 if ( $nrange_opts == 0 ) {
    50          
    50          
706              
707 0         0 @{$opt}{ 'imin', 'imax' }
708 0         0 = map { PDL::indx( $_ ) } $opt->{index}->minmax;
  0         0  
709             }
710              
711             elsif ( $nrange_opts == 1 ) {
712              
713 0 0       0 if ( defined $opt->{nbins} ) {
    0          
    0          
714 0         0 $opt->{imin} = PDL::indx( 0 );
715             }
716             elsif ( defined $opt->{imax} ) {
717 0         0 $opt->{imin} = PDL::indx( 0 );
718             }
719             elsif ( defined $opt->{imin} ) {
720 0         0 $opt->{imax} = PDL::indx( $opt->{index}->max );
721             }
722             }
723             elsif ( $nrange_opts == 3 ) {
724 0         0 Carp::croak( "error: too many options. specify only two of imin, imax, nbins\n" );
725             }
726             }
727              
728             # don't care about imax.
729              
730 50   66     300 $opt->{imin} //= $opt->{imax} - $opt->{nbins} + 1;
731 50   66     629 $opt->{nbins} //= $opt->{imax} - $opt->{imin} + 1;
732 50         1295 $opt->{nbins_max} = $opt->{nbins}->max;
733             }
734              
735             }
736              
737             =pod
738              
739             =for stopwords
740             ACM
741             Bevington
742             Commun
743             Golub
744             Higham
745             Kahan
746             LeVeque
747             McGraw
748             superset
749              
750             =head2 bin_on_index
751              
752             =for usage
753              
754             $hashref = bin_on_index( %pars );
755              
756             =for ref
757              
758             Calculate basic statistics on optionally weighted, binned data using a
759             provided index. The input data are assumed to be one-dimensional; extra
760             dimensions are threaded over.
761              
762             This routine ignores data with bad values or with weights that have bad values.
763              
764             =head3 Description
765              
766             When generating statistics for multiple-component data binned on a
767             common component, it's more efficient to calculate a bin index for the
768             common component and then use it to generate statistics for each component.
769              
770             For example, if a time dependent stream of events is binned into time
771             intervals, statistics of the events' properties (such as position or
772             energy) must be evaluated on data binned on the intervals.
773              
774             Some statistics (such as the summed squared deviation from the mean)
775             may be calculated in two passes over the data, but may suffer from
776             numerical inaccuracies depending upon the magnitude of the
777             intermediate values.
778              
779             C uses numerically stable algorithms to calculate
780             various statistics on binned data in a single pass. It uses an index
781             piddle to assign data to bins and calculates basic statistics on the
782             data in those bins. Data may have associated weights, and the index
783             piddle need not be sorted. It by default ignores out-of-bounds data, but
784             can be directed to operate on them.
785              
786             The statistics which are returned for each bin are:
787              
788             =over
789              
790             =item *
791              
792             The number of elements, I
793              
794             =item *
795              
796             The (weighted) sum of the data,
797             I<< Sum(x_i) >>,
798             I<< Sum(w_i * x_i) >>.
799              
800             =item *
801              
802             The (weighted) mean of the data,
803             I<< Sum(x_i) / N >>,
804             I<< Sum(w_i * x_i) / Sum(w_i) >>
805              
806             =item *
807              
808             The sum of the weights, I<< Sum(w_i) >>
809              
810             =item *
811              
812             The sum of the square of the weights, I<< Sum(w_i^2) >>
813              
814             =item *
815              
816             The sum of the (weighted) squared deviation of the data from mean,
817             I<< Sum( (x_i-u)^2 ) >>,
818             I<< Sum( w_i(x_i-u)^2 ) >>. These are I normalized!.
819              
820             =item *
821              
822             The minimum and maximum data values
823              
824             =back
825              
826             The sum of the squared deviations are I normalized, to allow the
827             user to handle it according to their needs.
828              
829             For unweighted data, the typical normalization factor is I,
830             while for weighted data, the normalization factor varies depending
831             upon whether the weights represent errors or quality weights. In the
832             former case, where I<< w_i = 1 / sigma^2 >>, the normalization factor
833             is typically I<< N / (N - 1) / Sum(w_i) >>, while for the latter the
834             normalization is typically I<< Sum(w_i) / ( Sum(w_i)^2 - Sum(w_i^2) ) >>.
835             See L and L.
836              
837             The algorithms used are chosen for their numerical stability. Sums
838             are computed using Kahan summation and the mean and squared deviations are
839             calculated with stable updating algorithms. See L, L, L.
840              
841             =head4 Threading
842              
843             The parameters L, L, L, L, L, and
844             L are threaded over. This keeps things quite flexible, as one
845             can specialize things for complex datasets or keep them simple by
846             specifying the minimum information for a given parameter to have it
847             remain constant over the threaded dimensions. (Note that C
848             must have the same shape or be a superset of the other parameters).
849              
850             Unless otherwise specified, the term I refers to those of the
851             core (non-threaded) one-dimensional slices of the input and output
852             piddles.
853              
854             For some parameters there is value in applying an algorithm to
855             either the entire dataset (including threaded dimensions) or just
856             the core one-dimensional data. For example, the L
857             option can indicate that the in-bounds bins are defined by the range
858             in each one-dimensional slice of L or in L as a whole.
859              
860             =head4 Minimum Bin Index, Number of Bins, Out-Of-Bounds Data
861              
862             By default, the minimum bin index and the number of bins are
863             determined from the values in the L piddle, treating it as if
864             it were one-dimensional. Statistics for data with the minimum index
865             are stored in the results piddles at index I<0>.
866              
867             The caller may use the options L, L, and
868             L, and L to change how the index range is mapped onto
869             the results piddles, and whether the range should be specific to each
870             one-dimensional slice of L. If none of these are specified,
871             the default is equivalent to setting L to C.
872             To most efficiently store the statistics, set L to C.
873              
874             Data with indices outside of the range C<[$imin, $imin + $nbins - 1]>
875             are treated as I data, and by default are ignored. If the
876             user wishes to accumulate statistics on these data, the L option
877             may be used to indicate where in the results piddles the statistics
878             should be stored.
879              
880             =head3 Parameters
881              
882             B is passed a hash or a reference to a hash containing
883             its parameters. The possible parameters are:
884              
885             =over
886              
887             =item C I
888              
889             The data.
890             I
891              
892             =item C I
893              
894             The bin index. It should be of type C, but will be converted to that if not.
895             It must be thread compatible with L.
896             I
897              
898             =item C I
899              
900             Data weight.
901             It must be thread compatible with L.
902             I
903              
904             =item C I | I
905              
906             The number of bins.
907             It must be thread compatible with L.
908              
909             If C is set and neither of C or C are set, C is set to C<0>.
910              
911             Use L for more control over automatic determination of the range.
912              
913             =item C I | I
914              
915             The index value associated with the first in-bounds index in the
916             result piddle.
917             It must be thread compatible with L.
918              
919             If C is set and neither of C or C are set, C is set to C<< $index->max >>.
920              
921             Use L for more control over automatic determination of the range.
922              
923             =item C I | I
924              
925             The index value associated with the last in-bounds index in the
926             result piddle. It must be thread compatible with L.
927              
928             If C is set and neither of C or C are set, C is set to C<< 0 >>.
929              
930             Use L for more control over automatic determination of the range.
931              
932             =item C I<"spec,spec,...">
933              
934             Determine the in-bounds range of indices from L. The value is
935             a string containing a list of comma separated specifications.
936              
937             The first element must be one of the following values:
938              
939             =over
940              
941             =item C
942              
943             Treat L as one-dimensional, and determine a single range which
944             covers it.
945              
946             =item C
947              
948             Determine a separate range for each one-dimensional slice of L.
949              
950             =back
951              
952             It may optionally be followed by one of the following
953              
954             =over
955              
956             =item C
957              
958             Determine the full range (i.e. both minimum and maximum) from
959             L. This is the default. Do not specify L,
960             L, or L.
961              
962             =item C
963              
964             Determine the minimum of the range from L. Specify only one of L
965             or L.
966              
967             =item C
968              
969             Determine the maximum of the range from L. Specify only one of L
970             or L.
971              
972             =back
973              
974             =item C
975              
976             An index is out-of-bounds if it is not within the range
977              
978             [ $imin, $imin + $nbins )
979              
980             By default, it will be ignored.
981              
982             This option specifies where in the results piddles the out-of-bound
983             data statistics should be stored. It may be one of:
984              
985             =over
986              
987             =item C
988              
989             The extent of the results piddle is increased by two, and out-of-bound
990             statistics are written as follows:
991              
992             $index - $imin < 0 ==> $result->at(0)
993             $index - $imin >= $nbins ==> $result->at(-1)
994              
995             =item C
996              
997             The extent of the results piddle is increased by two, and out-of-bound
998             statistics are written as follows:
999              
1000             $index - $imin < 0 ==> $result->at(0)
1001             $index - $imin >= $nbins ==> $result->at($nbins)
1002              
1003             This differs from C if C<$nbins> is different for each
1004             one-dimensional slice.
1005              
1006             =item C
1007              
1008             The extent of the results piddle is increased by two, and out-of-bound
1009             statistics are written as follows:
1010              
1011             $index - $imin < 0 ==> $result->at(-2)
1012             $index - $imin >= $nbins ==> $result->at(-1)
1013              
1014             =item C
1015              
1016             The extent of the results piddle is increased by two, and out-of-bound
1017             statistics are written as follows:
1018              
1019             $index - $imin < 0 ==> $result->at($nbins-1)
1020             $index - $imin >= $nbins ==> $result->at($nbins)
1021              
1022             This differs from C if C<$nbins> is different for each
1023             one-dimensional slice.
1024              
1025             =item I
1026              
1027             If false (the default) out-of-bounds data are ignored. If true, it is
1028             equivalent to specifying L
1029              
1030             =back
1031              
1032             =item C I I<[false]>
1033              
1034             if true, the sum of the bins' weights are calculated.
1035              
1036             =item C I I<[false]>
1037              
1038             if true, the sum of square of the bins' weights are calculated.
1039              
1040             =back
1041              
1042             =head3 Results
1043              
1044             B returns a reference which may be used either a hash
1045             or object reference.
1046              
1047             The keys (or methods) and their values are as follows:
1048              
1049             =over
1050              
1051             =item C
1052              
1053             A piddle containing the (possibly weighted) sum of the data in each bin.
1054              
1055             =item C
1056              
1057             A piddle containing the number of data elements in each bin.
1058              
1059             =item C
1060              
1061             A piddle containing the sum of the weights in each bin (only if
1062             L was provided and L specified).
1063              
1064             =item C
1065              
1066             A piddle containing the sum of the square of the weights in each bin
1067             (only if L was provided and L specified).
1068              
1069             =item C
1070              
1071             A piddle containing the (possibly weighted) mean of the data in each bin.
1072              
1073             =item C
1074              
1075             =item C
1076              
1077             Piddles containing the minimum and maximum values of the data in each bin.
1078              
1079             =item C
1080              
1081             =item C
1082              
1083             The index value associated with the first in-bounds index
1084             in the statistics piddles, and the number of in-bounds bins.
1085              
1086             =back
1087              
1088             =head3 References
1089              
1090             =over
1091              
1092             =item [Chan83]
1093              
1094             Chan, Tony F., Golub, Gene H., and LeVeque, Randall J., Algorithms for
1095             Computing the Sample Variance: Analysis and Recommendations, The
1096             American Statistician, August 1983, Vol. 37, No.3, p 242.
1097              
1098             =item [West79]
1099              
1100             D. H. D. West. 1979. Updating mean and variance estimates: an improved
1101             method. Commun. ACM 22, 9 (September 1979), 532-535.
1102              
1103             =item [Higham]
1104              
1105             Higham, N. (1996). Accuracy and stability of numerical
1106             algorithms. Philadelphia: Society for Industrial and Applied
1107             Mathematics.
1108              
1109             =item [Robinson]
1110              
1111             Robinson, E.L., Data Analysis for Scientists and Engineers, 2016,
1112             Princeton University Press. ISBN 978-0-691-16992-7.
1113              
1114             =item [Bevington]
1115              
1116             Bevington, P.R., Robinson, D.K., Data Reduction and Error Analysis for
1117             the Physical Sciences, Second Edition, 1992, McGraw-Hill, ISBN
1118             0-07-91243-9.
1119              
1120             =back
1121              
1122             =cut
1123              
1124              
1125              
1126              
1127              
1128              
1129              
1130              
1131              
1132              
1133             #line 408 "lib/CXC/PDL/Bin1D.pd"
1134              
1135             =head1 BUGS AND LIMITATIONS
1136              
1137             No bugs have been reported.
1138              
1139             =head1 AUTHOR
1140              
1141             Diab Jerius, Edjerius@cpan.orgE
1142              
1143             =head1 COPYRIGHT AND LICENSE
1144              
1145             Copyright 2008 Smithsonian Astrophysical Observatory
1146              
1147             CXC::PDL::Bin1D is free software: you can redistribute it and/or modify
1148             it under the terms of the GNU General Public License as published by
1149             the Free Software Foundation, either version 3 of the License, or (at
1150             your option) any later version.
1151              
1152             This program is distributed in the hope that it will be useful,
1153             but without any warranty; without even the implied warranty of
1154             merchantability or fitness for a particular purpose. See the
1155             GNU General Public License for more details.
1156              
1157             You should have received a copy of the GNU General Public License
1158             along with this program. If not, see L.
1159              
1160             =cut
1161             #line 1162 "Bin1D.pm"
1162              
1163             # Exit with OK status
1164              
1165             1;