| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Algorithm::CurveFit::Simple; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# ABSTRACT: Convenience wrapper around Algorithm::CurveFit. |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
our $VERSION = '1.03'; # VERSION 1.03 |
|
6
|
|
|
|
|
|
|
|
|
7
|
5
|
|
|
5
|
|
562395
|
use strict; |
|
|
5
|
|
|
|
|
21
|
|
|
|
5
|
|
|
|
|
111
|
|
|
8
|
5
|
|
|
5
|
|
20
|
use warnings; |
|
|
5
|
|
|
|
|
8
|
|
|
|
5
|
|
|
|
|
120
|
|
|
9
|
5
|
|
|
5
|
|
1876
|
use Algorithm::CurveFit; |
|
|
5
|
|
|
|
|
541876
|
|
|
|
5
|
|
|
|
|
205
|
|
|
10
|
5
|
|
|
5
|
|
38
|
use Time::HiRes; |
|
|
5
|
|
|
|
|
8
|
|
|
|
5
|
|
|
|
|
42
|
|
|
11
|
5
|
|
|
5
|
|
406
|
use JSON::PP; |
|
|
5
|
|
|
|
|
8
|
|
|
|
5
|
|
|
|
|
423
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our %STATS_H; # side-products of fit() stored here for profiling purposes |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
BEGIN { |
|
16
|
5
|
|
|
5
|
|
24
|
require Exporter; |
|
17
|
5
|
|
|
|
|
9
|
our $VERSION = '1.03'; |
|
18
|
5
|
|
|
|
|
90
|
our @ISA = qw(Exporter); |
|
19
|
5
|
|
|
|
|
8866
|
our @EXPORT_OK = qw(fit %STATS_H); |
|
20
|
|
|
|
|
|
|
} |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# fit() - only public function for this distribution |
|
23
|
|
|
|
|
|
|
# Given at least parameter "xy", generate a best-fit curve within a time limit. |
|
24
|
|
|
|
|
|
|
# Output: max deviation, avg deviation, implementation source string (perl or C, for now). |
|
25
|
|
|
|
|
|
|
# Optional parameters and their defaults: |
|
26
|
|
|
|
|
|
|
# terms => 3 # number of terms in formula, max is 10 |
|
27
|
|
|
|
|
|
|
# time_limit => 3 # number of seconds to try for better fit |
|
28
|
|
|
|
|
|
|
# inv => 1 # invert sense of curve-fit, from x->y to y->x |
|
29
|
|
|
|
|
|
|
# impl_lang => 'perl' # programming language used for output implementation: perl, c |
|
30
|
|
|
|
|
|
|
# impl_name => 'x2y' # name given to output implementation function |
|
31
|
|
|
|
|
|
|
sub fit { |
|
32
|
1
|
|
|
1
|
1
|
79
|
my %p = @_; |
|
33
|
|
|
|
|
|
|
|
|
34
|
1
|
|
|
|
|
13
|
my $formula = _init_formula(%p); |
|
35
|
1
|
|
|
|
|
5
|
my ($xdata, $ydata) = _init_data(%p); |
|
36
|
1
|
|
|
|
|
4
|
my $parameters = _init_parameters($xdata, $ydata, %p); |
|
37
|
|
|
|
|
|
|
|
|
38
|
1
|
|
|
|
|
1
|
my $iter_mode = 'time'; |
|
39
|
1
|
|
|
|
|
2
|
my $time_limit = 3; # sane default? |
|
40
|
1
|
50
|
|
|
|
2
|
$time_limit = 0.01 if ($time_limit < 0.01); |
|
41
|
1
|
|
|
|
|
2
|
my $n_iter; |
|
42
|
1
|
50
|
|
|
|
3
|
if (defined($p{iterations})) { |
|
43
|
0
|
|
|
|
|
0
|
$iter_mode = 'iter'; |
|
44
|
0
|
|
0
|
|
|
0
|
$n_iter = $p{iterations} || 10000; |
|
45
|
|
|
|
|
|
|
} else { |
|
46
|
1
|
|
33
|
|
|
4
|
$time_limit = $p{time_limit} // $time_limit; |
|
47
|
1
|
|
|
|
|
2
|
$n_iter = 10000 * $time_limit; # will use this to figure out how long it -really- takes. |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
|
|
50
|
1
|
|
|
|
|
2
|
my ($n_sec, $params_ar_ar); |
|
51
|
1
|
50
|
|
|
|
3
|
if ($iter_mode eq 'time') { |
|
52
|
1
|
|
|
|
|
4
|
($n_sec, $params_ar_ar) = _try_fit($formula, $parameters, $xdata, $ydata, $n_iter, $p{fitter_class}); |
|
53
|
1
|
|
|
|
|
4
|
$STATS_H{iter_mode} = $iter_mode; |
|
54
|
1
|
|
|
|
|
3
|
$STATS_H{fit_calib_iter} = $n_iter; |
|
55
|
1
|
|
|
|
|
2
|
$STATS_H{fit_calib_time} = $n_sec; |
|
56
|
1
|
|
|
|
|
2
|
$STATS_H{fit_calib_parar} = $params_ar_ar; |
|
57
|
1
|
|
|
|
|
3
|
$n_iter = int(($time_limit / $n_sec) * $n_iter + 1); |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
|
|
60
|
1
|
|
|
|
|
5
|
($n_sec, $params_ar_ar) = _try_fit($formula, $parameters, $xdata, $ydata, $n_iter, $p{fitter_class}); |
|
61
|
1
|
|
|
|
|
4
|
$STATS_H{fit_iter} = $n_iter; |
|
62
|
1
|
|
|
|
|
3
|
$STATS_H{fit_time} = $n_sec; |
|
63
|
1
|
|
|
|
|
2
|
$STATS_H{fit_parar} = $params_ar_ar; |
|
64
|
|
|
|
|
|
|
|
|
65
|
1
|
|
|
|
|
5
|
my $coderef = _implement_formula($params_ar_ar, "coderef", "", $xdata, \%p); |
|
66
|
1
|
|
|
|
|
4
|
my ($max_dev, $avg_dev) = _calculate_deviation($coderef, $xdata, $ydata); |
|
67
|
1
|
|
50
|
|
|
4
|
my $impl_lang = $p{impl_lang} // 'perl'; |
|
68
|
1
|
|
|
|
|
3
|
$impl_lang = lc($impl_lang); |
|
69
|
1
|
50
|
|
|
|
3
|
my $impl_name = $p{inv} ? "y2x" : "x2y"; |
|
70
|
1
|
|
33
|
|
|
6
|
$impl_name = $p{impl_name} // $impl_name; |
|
71
|
1
|
|
|
|
|
2
|
my $impl = $coderef; |
|
72
|
1
|
50
|
|
|
|
5
|
$impl = _implement_formula($params_ar_ar, $impl_lang, $impl_name, $xdata, \%p) unless($impl_lang eq 'coderef'); |
|
73
|
1
|
|
|
|
|
17
|
return ($max_dev, $avg_dev, $impl); |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# ($n_sec, $params_ar_ar) = _try_fit($formula, $parameters, $xdata, $ydata, $n_iter, $p{fitter_class}); |
|
77
|
|
|
|
|
|
|
sub _try_fit { |
|
78
|
2
|
|
|
2
|
|
9
|
my ($formula, $parameters, $xdata, $ydata, $n_iter, $fitter_class) = @_; |
|
79
|
2
|
|
50
|
|
|
18
|
$fitter_class //= "Algorithm::CurveFit"; |
|
80
|
2
|
|
|
|
|
4
|
my $params_ar_ar = [map {[@$_]} @$parameters]; # making a copy because curve_fit() is destructive |
|
|
8
|
|
|
|
|
15
|
|
|
81
|
2
|
|
|
|
|
6
|
my $tm0 = Time::HiRes::time(); |
|
82
|
2
|
|
|
|
|
16
|
my $res = $fitter_class->curve_fit( |
|
83
|
|
|
|
|
|
|
formula => $formula, |
|
84
|
|
|
|
|
|
|
params => $params_ar_ar, |
|
85
|
|
|
|
|
|
|
variable => 'x', |
|
86
|
|
|
|
|
|
|
xdata => $xdata, |
|
87
|
|
|
|
|
|
|
ydata => $ydata, |
|
88
|
|
|
|
|
|
|
maximum_iterations => $n_iter |
|
89
|
|
|
|
|
|
|
); |
|
90
|
2
|
|
|
|
|
203406
|
my $tm_elapsed = Time::HiRes::time() - $tm0; |
|
91
|
2
|
|
|
|
|
9
|
return ($tm_elapsed, $params_ar_ar); |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub _init_formula { |
|
95
|
11
|
|
|
11
|
|
10092
|
my %p = @_; |
|
96
|
11
|
|
|
|
|
17
|
my $formula = 'k + a*x + b*x^2 + c*x^3'; # sane'ish default |
|
97
|
11
|
|
100
|
|
|
41
|
my $terms = $p{terms} // 3; |
|
98
|
11
|
50
|
|
|
|
20
|
die "maximum of 10 terms\n" if ($terms > 10); |
|
99
|
11
|
100
|
|
|
|
20
|
if ($terms != 3) { |
|
100
|
8
|
|
|
|
|
10
|
$formula = 'k'; |
|
101
|
8
|
|
|
|
|
14
|
for (my $i = 1; $i <= $terms; $i++) { |
|
102
|
42
|
|
|
|
|
44
|
my $fact = chr(ord('a') + $i - 1); |
|
103
|
42
|
|
|
|
|
79
|
$formula .= " + $fact * x^$i"; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
} |
|
106
|
11
|
|
|
|
|
48
|
return $formula; |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# ($xdata, $ydata) = _init_data(%p); |
|
110
|
|
|
|
|
|
|
sub _init_data { |
|
111
|
15
|
|
|
15
|
|
10628
|
my %p = @_; |
|
112
|
15
|
|
|
|
|
24
|
my ($xdata, $ydata); |
|
113
|
15
|
100
|
100
|
|
|
66
|
if (defined($p{xydata})) { |
|
|
|
100
|
|
|
|
|
|
|
114
|
7
|
|
|
|
|
9
|
my $xy = $p{xydata}; |
|
115
|
7
|
50
|
33
|
|
|
54
|
unless ( |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
116
|
|
|
|
|
|
|
ref($xy) eq 'ARRAY' |
|
117
|
|
|
|
|
|
|
&& @$xy >= 2 |
|
118
|
|
|
|
|
|
|
&& ref($xy->[0]) eq 'ARRAY' |
|
119
|
|
|
|
|
|
|
&& ref($xy->[1]) eq 'ARRAY' |
|
120
|
|
|
|
|
|
|
) { |
|
121
|
0
|
|
|
|
|
0
|
die "xydata must be either an arrayref of [x, y] data point arrayrefs or an arrayref [[x0, x1, ... xN], [y0, y1, ... yN]]\n"; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
7
|
100
|
100
|
|
|
17
|
if (@$xy == 2 && @{$xy->[0]} > 2) { |
|
|
3
|
|
|
|
|
7
|
|
|
124
|
|
|
|
|
|
|
# user has provided [[x, ..], [y, ..]] |
|
125
|
2
|
|
|
|
|
2
|
$xdata = $xy->[0]; |
|
126
|
2
|
|
|
|
|
3
|
$ydata = $xy->[1]; |
|
127
|
|
|
|
|
|
|
} else { |
|
128
|
|
|
|
|
|
|
# user has provided [[x, y], [x, y], ..] |
|
129
|
5
|
100
|
|
|
|
6
|
die "pairwise xydata must have two data points per element\n" unless(@{$xy->[0]} == 2); |
|
|
5
|
|
|
|
|
16
|
|
|
130
|
4
|
|
|
|
|
6
|
$xdata = [map {$_->[0]} @{$xy}]; |
|
|
21
|
|
|
|
|
26
|
|
|
|
4
|
|
|
|
|
7
|
|
|
131
|
4
|
|
|
|
|
6
|
$ydata = [map {$_->[1]} @{$xy}]; |
|
|
21
|
|
|
|
|
21
|
|
|
|
4
|
|
|
|
|
5
|
|
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
elsif (defined($p{xdata}) && defined($p{ydata})) { |
|
135
|
5
|
|
|
|
|
8
|
$xdata = $p{xdata}; |
|
136
|
5
|
|
|
|
|
4
|
$ydata = $p{ydata}; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
else { |
|
139
|
3
|
|
|
|
|
14
|
die "must provide at least xydata or both xdata and ydata\n"; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
11
|
50
|
33
|
|
|
40
|
die "xdata and ydata must both be arrayref\n" unless (ref($xdata) eq "ARRAY" && ref($ydata) eq "ARRAY"); |
|
142
|
11
|
100
|
|
|
|
31
|
die "xdata and ydata must have the same number of elements\n" unless (@$xdata == @$ydata); |
|
143
|
9
|
100
|
|
|
|
17
|
die "must have more than one data-point\n" unless (@$xdata > 1); |
|
144
|
8
|
100
|
|
|
|
15
|
if ($p{inv}) { |
|
145
|
3
|
|
|
|
|
5
|
$STATS_H{xdata} = $ydata; |
|
146
|
3
|
|
|
|
|
4
|
$STATS_H{ydata} = $xdata; |
|
147
|
3
|
|
|
|
|
18
|
return ($ydata, $xdata); |
|
148
|
|
|
|
|
|
|
} |
|
149
|
5
|
|
|
|
|
8
|
$STATS_H{xdata} = $xdata; |
|
150
|
5
|
|
|
|
|
7
|
$STATS_H{ydata} = $ydata; |
|
151
|
5
|
|
|
|
|
16
|
return ($xdata, $ydata); |
|
152
|
|
|
|
|
|
|
} |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# $parameters = _init_parameters($xdata, $ydata, %p); |
|
155
|
|
|
|
|
|
|
sub _init_parameters { |
|
156
|
3
|
|
|
3
|
|
1207
|
my ($xdata, $ydata, %p) = @_; |
|
157
|
3
|
|
|
|
|
5
|
my $k = 0; |
|
158
|
3
|
|
|
|
|
4
|
my $n_points = @$xdata; |
|
159
|
3
|
|
|
|
|
6
|
foreach my $v (@$ydata) { $k += $v; } |
|
|
18
|
|
|
|
|
18
|
|
|
160
|
3
|
|
|
|
|
5
|
$k /= $n_points; |
|
161
|
|
|
|
|
|
|
# zzapp -- implement any precision hints here. |
|
162
|
3
|
|
|
|
|
9
|
my @params = (['k', $k, 0.0000001]); |
|
163
|
3
|
|
100
|
|
|
11
|
my $terms = $p{terms} // 3; |
|
164
|
3
|
|
|
|
|
8
|
push @params, map {[chr(ord('a')+$_-1), 0.5, 0.0000001]} (1..$terms); |
|
|
8
|
|
|
|
|
18
|
|
|
165
|
3
|
|
|
|
|
10
|
return \@params; |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# $impl = _implement_formula($params_ar_ar, $impl_lang, $impl_name, $xdata, \%p) unless($impl_lang eq 'coderef'); |
|
169
|
|
|
|
|
|
|
sub _implement_formula { |
|
170
|
10
|
|
|
10
|
|
4609
|
my ($params_ar_ar, $impl_lang, $impl_name, $xdata, $opt_hr) = @_; |
|
171
|
10
|
100
|
|
|
|
28
|
return _implement_formula_as_coderef(@_) if ($impl_lang eq 'coderef'); |
|
172
|
|
|
|
|
|
|
# return _implement_formula_as_python(@_) if ($impl_lang eq 'python'); # zzapp |
|
173
|
8
|
100
|
|
|
|
18
|
return _implement_formula_as_C(@_) if ($impl_lang eq 'c'); |
|
174
|
|
|
|
|
|
|
# return _implement_formula_as_R(@_) if ($impl_lang eq 'r'); # zzapp |
|
175
|
|
|
|
|
|
|
# return _implement_formula_as_MATLAB(@_) if ($impl_lang eq 'matlab'); # zzapp |
|
176
|
5
|
|
|
|
|
14
|
return _implement_formula_as_perl(@_); |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub _implement_formula_as_coderef { |
|
180
|
2
|
|
|
2
|
|
6
|
my ($params_ar_ar, $impl_lang, $impl_name, $xdata, $opt_hr) = @_; |
|
181
|
2
|
|
|
|
|
4
|
my $k_ar = $params_ar_ar->[0]; |
|
182
|
2
|
|
|
|
|
15
|
my $formula = sprintf("%f", $k_ar->[1]); |
|
183
|
2
|
|
|
|
|
6
|
for (my $i = 1; defined($params_ar_ar->[$i]); $i++) { |
|
184
|
6
|
|
|
|
|
9
|
my $fact = $params_ar_ar->[$i]->[1]; |
|
185
|
6
|
100
|
|
|
|
14
|
my $pow = ($i == 1) ? "" : "**$i"; |
|
186
|
6
|
|
|
|
|
26
|
$formula .= sprintf(' + %f * $x%s', $fact, $pow); |
|
187
|
|
|
|
|
|
|
} |
|
188
|
2
|
|
|
|
|
4
|
$STATS_H{impl_formula} = $formula; |
|
189
|
2
|
|
|
|
|
5
|
my $bounder = ''; |
|
190
|
2
|
50
|
|
|
|
8
|
if ($opt_hr->{bounds_check}) { |
|
191
|
0
|
|
|
|
|
0
|
my ($high_x, $low_x) = ($xdata->[0], $xdata->[0]); |
|
192
|
0
|
|
|
|
|
0
|
foreach my $x (@$xdata) { |
|
193
|
0
|
0
|
|
|
|
0
|
$high_x = $x if ($high_x < $x); |
|
194
|
0
|
0
|
|
|
|
0
|
$low_x = $x if ($low_x > $x); |
|
195
|
|
|
|
|
|
|
} |
|
196
|
0
|
|
|
|
|
0
|
$bounder = 'die "x out of bounds (high)" if ($x > '.$high_x.'); die "x out of bounds (low)" if ($x < '.$low_x.');'; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
2
|
|
|
|
|
3
|
my $rounder = ''; |
|
199
|
2
|
50
|
|
|
|
6
|
$rounder = '$y = int($y + 0.5);' if ($opt_hr->{round_result}); |
|
200
|
2
|
|
|
|
|
8
|
my $src = 'sub { my($x) = @_; '.$bounder.' my $y = '.$formula.'; '.$rounder.' return $y; }'; |
|
201
|
2
|
|
|
|
|
4
|
$STATS_H{impl_source} = $src; |
|
202
|
2
|
|
|
|
|
3
|
$STATS_H{impl_exception} = ''; |
|
203
|
2
|
|
|
|
|
195
|
my $coderef = eval($src); |
|
204
|
2
|
50
|
|
|
|
8
|
$STATS_H{impl_exception} = $@ unless(defined($coderef)); |
|
205
|
2
|
|
|
|
|
7
|
return $coderef; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub _implement_formula_as_perl { |
|
209
|
5
|
|
|
5
|
|
8
|
my ($params_ar_ar, $impl_lang, $impl_name, $xdata, $opt_hr) = @_; |
|
210
|
5
|
|
|
|
|
9
|
my $k_ar = $params_ar_ar->[0]; |
|
211
|
5
|
|
|
|
|
31
|
my $formula = sprintf("%.11f", $k_ar->[1]); |
|
212
|
5
|
|
|
|
|
14
|
for (my $i = 1; defined($params_ar_ar->[$i]); $i++) { |
|
213
|
15
|
|
|
|
|
17
|
my $fact = $params_ar_ar->[$i]->[1]; |
|
214
|
15
|
100
|
|
|
|
31
|
my $pow = ($i == 1) ? "" : "**$i"; |
|
215
|
15
|
|
|
|
|
54
|
$formula .= sprintf(' + %.11f * $x%s', $fact, $pow); |
|
216
|
|
|
|
|
|
|
} |
|
217
|
5
|
|
|
|
|
18
|
$STATS_H{impl_formula} = $formula; |
|
218
|
5
|
|
|
|
|
6
|
my $bounder = ''; |
|
219
|
5
|
100
|
|
|
|
12
|
if ($opt_hr->{bounds_check}) { |
|
220
|
1
|
|
|
|
|
3
|
my ($high_x, $low_x) = ($xdata->[0], $xdata->[0]); |
|
221
|
1
|
|
|
|
|
2
|
foreach my $x (@$xdata) { |
|
222
|
7
|
100
|
|
|
|
9
|
$high_x = $x if ($high_x < $x); |
|
223
|
7
|
100
|
|
|
|
10
|
$low_x = $x if ($low_x > $x); |
|
224
|
|
|
|
|
|
|
} |
|
225
|
1
|
|
|
|
|
7
|
$bounder = sprintf(' die "x out of bounds (high)" if ($x > %.11f);'."\n", $high_x) . |
|
226
|
|
|
|
|
|
|
sprintf(' die "x out of bounds (low)" if ($x < %.11f);'."\n", $low_x); |
|
227
|
|
|
|
|
|
|
} |
|
228
|
5
|
|
|
|
|
8
|
my $rounder = ''; |
|
229
|
5
|
100
|
|
|
|
14
|
$rounder = ' $y = int($y + 0.5);'."\n" if ($opt_hr->{round_result}); |
|
230
|
5
|
|
|
|
|
21
|
my $src = join("\n",( |
|
231
|
|
|
|
|
|
|
"sub $impl_name {", |
|
232
|
|
|
|
|
|
|
' my($x) = @_;', |
|
233
|
|
|
|
|
|
|
$bounder, |
|
234
|
|
|
|
|
|
|
' my $y = '.$formula.';', |
|
235
|
|
|
|
|
|
|
$rounder, |
|
236
|
|
|
|
|
|
|
' return $y;', |
|
237
|
|
|
|
|
|
|
'}' |
|
238
|
|
|
|
|
|
|
)); |
|
239
|
5
|
|
|
|
|
8
|
$STATS_H{impl_source} = $src; |
|
240
|
5
|
|
|
|
|
7
|
$STATS_H{impl_exception} = ''; |
|
241
|
5
|
|
|
|
|
14
|
return $src; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub _implement_formula_as_C { |
|
245
|
3
|
|
|
3
|
|
18
|
my ($params_ar_ar, $impl_lang, $impl_name, $xdata, $opt_hr) = @_; |
|
246
|
3
|
|
|
|
|
5
|
my $k_ar = $params_ar_ar->[0]; |
|
247
|
3
|
|
|
|
|
3
|
my $src = ""; |
|
248
|
3
|
100
|
66
|
|
|
10
|
$src .= "#include \n" if ($opt_hr->{round_result} && !$opt_hr->{suppress_includes}); |
|
249
|
3
|
|
|
|
|
6
|
$src .= "double $impl_name(double x) {\n"; |
|
250
|
3
|
|
|
|
|
16
|
$src .= sprintf(" double y = %.11f;\n", $k_ar->[1]); |
|
251
|
3
|
|
|
|
|
5
|
$src .= " double xx = x;\n"; # eliminating pow() calls, which gcc doesn't seem willing to optimize completely away |
|
252
|
|
|
|
|
|
|
|
|
253
|
3
|
100
|
|
|
|
7
|
if ($opt_hr->{bounds_check}) { |
|
254
|
1
|
|
|
|
|
2
|
my ($high_x, $low_x) = ($xdata->[0], $xdata->[0]); |
|
255
|
1
|
|
|
|
|
3
|
foreach my $x (@$xdata) { |
|
256
|
7
|
100
|
|
|
|
11
|
$high_x = $x if ($high_x < $x); |
|
257
|
7
|
100
|
|
|
|
8
|
$low_x = $x if ($low_x > $x); |
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
# zzapp -- this is kludgy. better way to signal bounds violation? |
|
260
|
1
|
|
|
|
|
8
|
$src .= sprintf(" if (x > %.11f) return -1.0;\n", $high_x) . |
|
261
|
|
|
|
|
|
|
sprintf(" if (x < %.11f) return -1.0;\n", $low_x); |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
|
|
264
|
3
|
|
|
|
|
4
|
my $formula = ""; |
|
265
|
3
|
|
|
|
|
8
|
for (my $i = 1; defined($params_ar_ar->[$i]); $i++) { |
|
266
|
9
|
|
|
|
|
10
|
my $fact = $params_ar_ar->[$i]->[1]; |
|
267
|
9
|
|
|
|
|
21
|
$formula .= sprintf(" y += %.11f * xx;\n", $fact); |
|
268
|
9
|
100
|
|
|
|
23
|
$formula .= " xx *= x;\n" if(defined($params_ar_ar->[$i+1])); |
|
269
|
|
|
|
|
|
|
} |
|
270
|
3
|
|
|
|
|
4
|
$STATS_H{impl_formula} = $formula; # zzapp -- not clean! |
|
271
|
|
|
|
|
|
|
|
|
272
|
3
|
|
|
|
|
5
|
$src .= $formula; |
|
273
|
3
|
100
|
|
|
|
7
|
$src .= " y = round(y);\n" if ($opt_hr->{round_result}); |
|
274
|
3
|
|
|
|
|
4
|
$src .= " return y;\n}\n"; |
|
275
|
3
|
|
|
|
|
3
|
$STATS_H{impl_source} = $src; |
|
276
|
3
|
|
|
|
|
4
|
$STATS_H{impl_exception} = ''; |
|
277
|
3
|
|
|
|
|
8
|
return $src; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# ($max_dev, $avg_dev) = _calculate_deviation($coderef, $xdata, $ydata); |
|
281
|
|
|
|
|
|
|
sub _calculate_deviation { |
|
282
|
1
|
|
|
1
|
|
2
|
my ($coderef, $xdata, $ydata) = @_; |
|
283
|
1
|
|
|
|
|
2
|
my $max_off = 1.0; |
|
284
|
1
|
|
|
|
|
2
|
my $max_off_datum = 0.0; |
|
285
|
1
|
|
|
|
|
2
|
my $tot_off = 0.0; |
|
286
|
1
|
|
|
|
|
8
|
for (my $i = 0; defined($xdata->[$i]); $i++) { |
|
287
|
10
|
|
|
|
|
9
|
my $x = $xdata->[$i]; |
|
288
|
10
|
|
|
|
|
8
|
my $y = eval { $coderef->($x); }; |
|
|
10
|
|
|
|
|
142
|
|
|
289
|
10
|
50
|
|
|
|
39
|
unless(defined($y)) { |
|
290
|
0
|
|
|
|
|
0
|
$STATS_H{deviation_exception} = $@; |
|
291
|
0
|
|
|
|
|
0
|
$STATS_H{deviation_exception_datum} = $x; |
|
292
|
0
|
|
|
|
|
0
|
die "caught exception calculating deviations"; |
|
293
|
|
|
|
|
|
|
} |
|
294
|
|
|
|
|
|
|
|
|
295
|
10
|
|
|
|
|
14
|
my $observed_y = $ydata->[$i]; |
|
296
|
10
|
50
|
33
|
|
|
24
|
if ($observed_y && $y) { |
|
297
|
10
|
100
|
|
|
|
17
|
my $deviation = $y > $observed_y ? $y / $observed_y : $observed_y / $y; |
|
298
|
10
|
|
|
|
|
11
|
my $dev_mag = abs($deviation - 1.0); |
|
299
|
10
|
|
|
|
|
11
|
my $max_mag = abs($max_off - 1.0); |
|
300
|
|
|
|
|
|
|
# print "x=$x\ty=$y\toy=$observed_y\tdev_mag=$dev_mag\tmax_mag=$max_mag\n"; |
|
301
|
10
|
100
|
|
|
|
15
|
($max_off, $max_off_datum) = ($deviation, $x) if ($dev_mag > $max_mag); |
|
302
|
10
|
|
|
|
|
19
|
$tot_off += $deviation; |
|
303
|
|
|
|
|
|
|
} else { |
|
304
|
0
|
|
|
|
|
0
|
$tot_off += 1.0; |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
} |
|
307
|
1
|
|
|
|
|
3
|
$STATS_H{deviation_max_offset_datum} = $max_off_datum; |
|
308
|
1
|
|
|
|
|
4
|
return ($max_off, $tot_off / @$xdata); |
|
309
|
|
|
|
|
|
|
} |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
1; |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=head1 NAME |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Algorithm::CurveFit::Simple - Convenience wrapper around Algorithm::CurveFit |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
use Algorithm::CurveFit::Simple qw(fit); |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
my ($max_dev, $avg_dev, $src) = fit(xdata => \@xdata, ydata => \@ydata, ..options..); |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# Alternatively pass xdata and ydata together: |
|
325
|
|
|
|
|
|
|
my ($max_dev, $avg_dev, $src) = fit(xydata => [\@xdata, \@ydata], ..options..); |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# Alternatively pass data as array of [x,y] pairs: |
|
328
|
|
|
|
|
|
|
my ($max_dev, $avg_dev, $src) = fit(xydata => [[1, 2], [2, 5], [3, 10]], ..options..); |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
This is a convenience wrapper around L. Given a body of (x, y) data points, it will generate a polynomial formula f(x) = y which fits that data. |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
Its main differences from L are: |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
=over 4 |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=item * It synthesizes the initial formula for you, |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=item * It allows for a time limit on the curve-fit instead of an iteration count, |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=item * It implements the formula as source code (or as a perl coderef, if you want to use the formula immediately in your program). |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=back |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
Additionally it returns a maximum deviation and average deviation of the formula vs the xydata, which is more useful (to me, at least) than L's square residual output. Closer to 1.0 indicates a better fit. Play with C #> until these deviations are as close to 1.0 as possible, and beware overfitting. |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=head1 SUBROUTINES |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
There is only one public subroutine, C. It B be given either C or C and C parameters. All other paramters are optional. |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
It returns three values: A maximum deviation, the average deviation and the formula implementation. |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head2 Options |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=over 4 |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=item C \@xdata, ydata =E \@ydata)> |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
The data points the formula will fit. Same as L parameters of the same name. |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=item C [[1, 2, 3, 4], [10, 17, 26, 37]])> |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=item C [[1, 10], [2, 17], [3, 26], [4, 37]])> |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
A more convenient way to provide data points. C will try to detect how the data points are organized -- list of x and list of y, or list of [x,y]. |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=item C 3)> |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Sets the order of the polynomial, which will be of the form C. The default is 3 and the limit is 10. |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
There is no need to specify initial C. It will be calculated from C. |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=item C 3)> |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
If a time limit is given (in seconds), C will spend no more than that long trying to fit the data. It may return in much less time. The default is 3. |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=item C 10000)> |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
If an iteration count is given, C will ignore any time limit and iterate up to C times trying to fit the curve. Same as L parameter of the same name. |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=item C 1)> |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
Setting C inverts the sense of the fit. Instead of C the formula will fit C. |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=item C "perl")> |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
Sets the programming language in which the formula will be implemented. Currently supported languages are C<"C">, C<"coderef"> and the default, C<"perl">. |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
When C "coderef"> is specified, a code reference is returned instead which may be used immediately by your perl script: |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
my($max_dev, $avg_dev, $x2y) = fit(xydata => \@xy, impl_lang => "coderef"); |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
my $y = $x2y->(42); |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
More implementation languages will be supported in the future. |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=item C "x2y")> |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Sets the name of the function implementing the formula. The default is C<"x2y">. Has no effect when used with C "coderef")>. |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
my($max_dev, $avg_dev, $src) = fit(xydata => \@xy, impl_name => "converto"); |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
print "$src\n"; |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub converto { |
|
407
|
|
|
|
|
|
|
my($x) = @_; |
|
408
|
|
|
|
|
|
|
my $y = -5340.93059104837 + 249.23009968947 * $x + -3.87745746448 * $x**2 + 0.02114780993 * $x**3; |
|
409
|
|
|
|
|
|
|
return $y; |
|
410
|
|
|
|
|
|
|
} |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=item C 1)> |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
When set, the implementation will include logic for checking whether the input is out-of-bounds, per the highest and lowest x points in the data used to fit the formula. For implementation languages which support exceptions, an exception will be thrown. For others (like C), C<-1.0> will be returned to indicate the error. |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
For instance, if the highest x in C<$xydata> is 83.0 and the lowest x is 60.0: |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
my($max_dev, $avg_dev, $src) = fit(xydata => \@xy, bounds_check => 1); |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
print "$src\n"; |
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
sub x2y { |
|
423
|
|
|
|
|
|
|
my($x) = @_; |
|
424
|
|
|
|
|
|
|
die "x out of bounds (high)" if ($x > 83.80000000000); |
|
425
|
|
|
|
|
|
|
die "x out of bounds (low)" if ($x < 60.80000000000); |
|
426
|
|
|
|
|
|
|
my $y = -5340.93059104837 + 249.23009968947 * $x + -3.87745746448 * $x**2 + 0.02114780993 * $x**3; |
|
427
|
|
|
|
|
|
|
return $y; |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=item C 1)> |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
When set, the implementation will round the output to the nearest whole number. When the implementation language is C<"C"> this adds an C<#include Emath.hE> directive to the source code, which will have to be compiled against libm -- see C. |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
my($max_dev, $avg_dev, $src) = fit(xydata => \@xy, round_result => 1); |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
print "$src\n"; |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
sub x2y { |
|
439
|
|
|
|
|
|
|
my($x) = @_; |
|
440
|
|
|
|
|
|
|
my $y = -5340.93059104837 + 249.23009968947 * $x + -3.87745746448 * $x**2 + 0.02114780993 * $x**3; |
|
441
|
|
|
|
|
|
|
$y = int($y + 0.5); |
|
442
|
|
|
|
|
|
|
return $y; |
|
443
|
|
|
|
|
|
|
} |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=item C 1)> |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
When set and C "C">, any C<#include> directives which the implementation might need will be suppressed. |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=back |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head1 VARIABLES |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
The class variable C<%STATS_H> contains various intermediate values which might be helpful. For instance, C<$STATS_H{deviation_max_offset_datum}> contains the x data point which corresponds to the maximum deviation returned. |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
The contents of C<%STATS_H> is subject to change and might not be fully documented in future versions. The current fields are: |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=over 4 |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=item C: The x data point corresponding with returned maximum deviation. |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item C: Arrayref of formula parameters as returned by L after a short fitting attempt used for timing calibration. |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=item C: The number of seconds L spent in the calibration run. |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=item C: The iterations parameter passed to L. |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=item C: Arrayref of formula parameters as returned by L. |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=item C: The number of seconds L actually spent fitting the formula. |
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=item C: The exception thrown when the implementation was used to calculate the deviations, or the empty string if none. |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=item C: The formula part of the implementation. |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=item C: The implementation source string. |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=item C: One of C<"time"> or C<"iter">, indicating whether a time limit was used or an iteration count. |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=item C: Arrayref of x data points as passed to L. |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=item C: Arrayref of y data points as passed to L. |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
=back |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=head1 CAVEATS |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=over 4 |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=item * Only simple polynomial functions are supported. Sometimes you need something else. Use L for such cases. |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=item * If C is very large, iterating over it to calculate deviances can take more time than permitted by C. |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=item * The dangers of overfitting are real! L |
|
494
|
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=item * Using too many terms can dramatically reduce the accuracy of the fitted formula. |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=item * Sometimes calling L with a ten-term polynomial causes it to hang. |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=back |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=head1 TO DO |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=over 4 |
|
504
|
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=item * Support more programming languages for formula implementation: R, MATLAB, python |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
=item * Calculate the actual term sigfigs and set precision appropriately in the formula implementation instead of just "%.11f". |
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=item * Support trying a range of terms and returning whatever gives the best fit. |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
=item * Support piecewise output formulas. |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=item * Work around L's occasional hang problem when using ten-term polynomials. |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=back |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
L |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
L |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=cut |