File Coverage

blib/lib/TimeSeries/AdaptiveFilter.pm
Criterion Covered Total %
statement 74 79 93.6
branch 14 20 70.0
condition 23 32 71.8
subroutine 6 6 100.0
pod 1 1 100.0
total 118 138 85.5


line stmt bran cond sub pod time code
1             package TimeSeries::AdaptiveFilter;
2 1     1   22962 use strict;
  1         2  
  1         21  
3 1     1   3 use warnings;
  1         1  
  1         40  
4              
5             =head1 NAME
6              
7             TimeSeries::AdaptiveFilter - Adaptive filter for data stream with possible outliers
8              
9             =cut
10              
11             our $VERSION = '0.05';
12              
13             =head1 STATUS
14              
15             =begin HTML
16              
17            

18            
19            

20              
21             =end HTML
22              
23             =cut
24              
25 1     1   4 use List::Util qw(sum max);
  1         1  
  1         94  
26              
27 1     1   3 use Exporter qw/import/;
  1         1  
  1         578  
28              
29             our @EXPORT_OK = qw/filter/;
30              
31             =head1 SYNOPSYS
32              
33             use TimeSeries::AdaptiveFilter qw/filter/;
34              
35             # creation with defaults
36             my $filter = filter();
37              
38             # create filter with tuned parameters
39             my $filter = filter({
40             floor => 6
41             cap => 0.2,
42             lookback_capacity => 20,
43             lookback_period => 4,
44             decay_speeds => [0.03, 0.01, 0.003],
45             build_up_count => 5,
46             reject_criterium => 4,
47             });
48              
49             # usage
50             my $now = time;
51             $filter->($now, 100.002); # returns true, i.e. all data is valid on learning period
52             $filter->($now + 1, 100.001); # returns true
53             ... # it learns form sample of 60 seconds
54             $filter->($now + 60, 100.005); # returns true
55             $filter->($now + 61, 99.9995); # returns true, as value does not differs much
56             $filter->($now + 62, 10_0000); # returns false, outlier data
57             $filter->($now + 63, 10.0001); # returns false, outlier data
58             $filter->($now + 64, 100.011); # returns true, even if the sample is oulier, because
59             # the filter rejected too much values, and has to
60             # re-adapt to time seria again
61              
62             =head1 DESCRIPTION
63              
64             For the details of underlying mathematical model of the filter, configurable parameters
65             and their usage, please, look at the shipped C folder.
66              
67             =cut
68              
69             my $sqrt2pi = 1 / sqrt(2 * atan2(1, 1));
70              
71             =head1 FUNCTIONS
72              
73             =head2 filter
74              
75             =cut
76              
77             sub filter {
78 4     4 1 1328 my $params = shift;
79              
80             ##############################
81             ## filter tuning parameters ##
82             ##############################
83              
84             # minimum amount of values in lookback to take an decision.
85             # Otherwise, input values will be accepted
86 4   100     22 my $floor = $params->{floor} // 6;
87             # maximum share of rejected input values. Upon hit, input values will be accepted
88 4   50     16 my $cap = $params->{cap} // 0.2;
89             # maximum amount ot input values in lookback
90 4   50     17 my $lookback_capacity = $params->{lookback_capacity} // 20;
91             # the retention period for lookback, i.e. max age for input values
92 4   50     14 my $lookback_period = $params->{lookback_period} // 4;
93 4   50     19 my $decay_speeds = $params->{decay_speeds} // [0.03, 0.01, 0.003];
94 4   100     22 my $build_up_count = $params->{build_up_count} // 5;
95 4   50     12 my $reject_criterium = $params->{reject_criterium} // 4;
96              
97             ########################
98             ## enclosed variables ##
99             ########################
100              
101 4         4 my @lookback;
102 4         3 my $lookback_rejected = 0;
103 4         5 my @minute;
104             my $trust_w;
105 0         0 my $ad;
106 0         0 my @ads; # used on build stage only
107 0         0 my $mad;
108 0         0 my @mads;
109 4         2 my $wsum = 0;
110 4         5 my $csum = 0;
111 4         4 my $_accepted;
112              
113             # flag initicates, that we still need to accumulate enough time series
114             # before doing actual filtering.
115 4         4 my $build = 1;
116              
117             # resulting adaptive filter function
118             my $fn = sub {
119 213     213   51679 my ($epoch, $spot) = @_;
120              
121             # operating on nature log
122 213         307 $spot = log $spot;
123              
124             # prevent loopback window overgrow
125 213   100     791 while (@lookback > $lookback_capacity or @lookback > $floor and $lookback[0][0] < $epoch - $lookback_period) {
      66        
126 178         167 my $leaving = shift @lookback;
127 178 50       732 --$lookback_rejected unless $leaving->[3];
128             }
129 213         161 my $accepted;
130              
131 213 100       265 if ($build) {
132             # always accept the incoming value
133 173         141 ($trust_w, $accepted) = (1, 1);
134              
135             # gather absolute differences
136 173 100       231 unless (@lookback < $build_up_count) {
137 153         241 $ad = abs($spot - $lookback[-$build_up_count][1]) / sqrt($build_up_count);
138 153         170 push @ads, $ad;
139             }
140              
141             # build condition: the current tick is 60s newer
142             # and we have enough absolute differences
143 173 100 100     563 if ( @minute
      66        
144             and $minute[0] < $epoch - 60
145             and @ads >= $build_up_count)
146             {
147 1         2 $build = 0;
148 1         5 my @new_ads = sort { $a <=> $b } @ads;
  263         160  
149 1         3 my $cut = int(@new_ads / $build_up_count);
150 1         13 @new_ads = @new_ads[$cut .. ($build_up_count - 1) * $cut];
151 1         10 $mad = sum(@new_ads) / @new_ads;
152 1         3 @mads = ($mad) x scalar(@$decay_speeds);
153             }
154             } else {
155             # prevent @minute overgrow
156 40   66     127 while (@minute and $minute[0] < $epoch - 60) {
157 41         120 shift @minute;
158             }
159 40         50 my $density = 60 / (1 + @minute); # the number "1", beacuse we account the current value too
160 40         38 my $ha = $wsum / $csum;
161 40         32 my $vol = $mad / $sqrt2pi;
162 40         41 my $diff = abs($spot - $ha);
163              
164             # ther there is zero-difference, we accept current piece of data;
165             # otherwise, if it is zero-volatility (flat), we reject it.
166 40 50       60 my $reject =
    50          
167             $diff
168             ? ($vol ? ($diff / $vol) : $reject_criterium + 1)
169             : 0;
170              
171 40         37 $accepted = !($reject > $reject_criterium);
172 40         93 $trust_w = 1 / (1 + ($reject / $reject_criterium)**8);
173              
174 40 50 66     68 if (not $accepted and $lookback_rejected > $cap * @lookback) {
175 0         0 ($accepted, $trust_w) = (1, 1);
176             }
177 40         55 $ad = abs($spot - $_accepted->[-$build_up_count][1]) / sqrt($build_up_count);
178 40 50       53 if ($ad) {
179 40         61 for my $idx (0 .. @$decay_speeds - 1) {
180             # MAD (mean absolute deviation)
181 120         117 my $mu = $trust_w * (1 - exp(-$density * $decay_speeds->[$idx]));
182 120         132 $mads[$idx] = (1 - $mu) * $mads[$idx] + $mu * $ad;
183             }
184             }
185 40         73 $mad = max(@mads);
186             }
187 213         231 push @minute, $epoch;
188 213         318 push @lookback, [$epoch, $spot, $trust_w, $accepted];
189 213 100       212 if ($accepted) {
190 212         232 push @$_accepted, [$epoch, $spot];
191 212         477 shift @$_accepted while @$_accepted > $build_up_count;
192             } else {
193 1         1 ++$lookback_rejected;
194             }
195 213 50       231 if ($trust_w) {
196 213         235 $wsum = 0.5 * $wsum + $spot * $trust_w;
197 213         162 $csum = 0.5 * $csum + $trust_w;
198             }
199 213         1069 return $accepted;
200 4         32 };
201 4         15 return $fn;
202             }
203              
204             1;