File Coverage

blib/lib/TimeSeries/AdaptiveFilter.pm
Criterion Covered Total %
statement 74 79 93.6
branch 14 20 70.0
condition 23 32 71.8
subroutine 6 6 100.0
pod 0 1 0.0
total 117 138 84.7


line stmt bran cond sub pod time code
1             package TimeSeries::AdaptiveFilter;
2              
3             =head1 NAME
4              
5             TimeSeries::AdaptiveFilter - Adaptive filter for data stream with possible outliers
6              
7             =cut
8              
9             our $VERSION = '0.03';
10              
11             =head1 VERSION
12              
13             Version 0.03
14              
15             =head1 STATUS
16              
17             =begin HTML
18              
19            

20            
21            

22              
23             =end HTML
24              
25             =cut
26              
27 1     1   14010 use strict;
  1         1  
  1         21  
28 1     1   2 use warnings;
  1         1  
  1         24  
29              
30 1     1   3 use List::Util qw(sum max);
  1         3  
  1         77  
31              
32 1     1   3 use parent qw/Exporter/;
  1         2  
  1         6  
33              
34             our @EXPORT_OK = qw/filter/;
35              
36             =head1 SYNOPSYS
37              
38             use TimeSeries::AdaptiveFilter qw/filter/;
39              
40             # creation with defaults
41             my $filter = filter();
42              
43             # create filter with tuned parameters
44             my $filter = filter({
45             floor => 6
46             cap => 0.2,
47             lookback_capacity => 20,
48             lookback_period => 4,
49             decay_speeds => [0.03, 0.01, 0.003],
50             build_up_count => 5,
51             reject_criterium => 4,
52             });
53              
54             # usage
55             my $now = time;
56             $filter->($now, 100.002); # returns true, i.e. all data is valid on learning period
57             $filter->($now + 1, 100.001); # returns true
58             ... # it learns form sample of 60 seconds
59             $filter->($now + 60, 100.005); # returns true
60             $filter->($now + 61, 99.9995); # returns true, as value does not differs much
61             $filter->($now + 62, 10_0000); # returns false, outlier data
62             $filter->($now + 63, 10.0001); # returns false, outlier data
63             $filter->($now + 64, 100.011); # returns true, even if the sample is oulier, because
64             # the filter rejected too much values, and has to
65             # re-adapt to time seria again
66              
67             =head1 DESCRIPTION
68              
69             For the details of underlying mathematical model of the filter, configurable paramters
70             and their usage, please, look at the shipped C folder.
71              
72             =cut
73              
74             my $sqrt2pi = 1 / sqrt(2 * atan2(1, 1));
75              
76             sub filter {
77 4     4 0 949 my $params = shift;
78              
79             ##############################
80             ## filter tuning parameters ##
81             ##############################
82              
83             # minimum amount of values in lookback to take an decision.
84             # Otherwise, input values will be accepted
85 4   100     16 my $floor = $params->{floor} // 6;
86             # maximum share of rejected input values. Upon hit, input values will be accepted
87 4   50     11 my $cap = $params->{cap} // 0.2;
88             # maximum amount ot input values in lookback
89 4   50     11 my $lookback_capacity = $params->{lookback_capacity} // 20;
90             # the retention period for lookback, i.e. max age for input values
91 4   50     11 my $lookback_period = $params->{lookback_period} // 4;
92 4   50     14 my $decay_speeds = $params->{decay_speeds} // [0.03, 0.01, 0.003];
93 4   100     9 my $build_up_count = $params->{build_up_count} // 5;
94 4   50     11 my $reject_criterium = $params->{reject_criterium} // 4;
95              
96             ########################
97             ## enclosed variables ##
98             ########################
99              
100 4         4 my @lookback;
101 4         2 my $lookback_rejected = 0;
102 4         3 my @minute;
103             my $trust_w;
104 0         0 my $ad;
105 0         0 my @ads; # used on build stage only
106 0         0 my $mad;
107 0         0 my @mads;
108 4         3 my $wsum = 0;
109 4         2 my $csum = 0;
110 4         4 my $vol;
111             my $_accepted;
112              
113             # flag initicates, that we still need to accumulate enough time series
114             # before doing actual filtering.
115 4         3 my $build = 1;
116              
117             # resulting adaptive filter function
118             my $fn = sub {
119 213     213   35594 my ($epoch, $spot) = @_;
120              
121             # operating on nature log
122 213         281 $spot = log $spot;
123              
124             # prevent loopback window overgrow
125 213   100     808 while (@lookback > $lookback_capacity or @lookback > $floor and $lookback[0][0] < $epoch - $lookback_period) {
      66        
126 178         147 my $leaving = shift @lookback;
127 178 50       743 --$lookback_rejected unless $leaving->[3];
128             }
129 213         137 my $accepted;
130              
131 213 100       240 if ($build) {
132             # always accept the incoming value
133 173         138 ($trust_w, $accepted) = (1, 1);
134              
135             # gather absolute differences
136 173 100       224 unless (@lookback < $build_up_count) {
137 153         228 $ad = abs($spot - $lookback[-$build_up_count][1]) / sqrt($build_up_count);
138 153         159 push @ads, $ad;
139             }
140              
141             # build condition: the current tick is 60s newer
142             # and we have enough absolute differences
143 173 100 100     548 if ( @minute
      66        
144             and $minute[0] < $epoch - 60
145             and @ads >= $build_up_count)
146             {
147 1         1 $build = 0;
148 1         6 my @new_ads = sort { $a <=> $b } @ads;
  263         149  
149 1         3 my $cut = int(@new_ads / $build_up_count);
150 1         6 @new_ads = @new_ads[$cut .. ($build_up_count - 1) * $cut];
151 1         9 $mad = sum(@new_ads) / @new_ads;
152 1         3 @mads = ($mad) x scalar(@$decay_speeds);
153             }
154             } else {
155             # prevent @minute overgrow
156 40   66     118 while (@minute and $minute[0] < $epoch - 60) {
157 41         108 shift @minute;
158             }
159 40         45 my $density = 60 / (1 + @minute); # the number "1", beacuse we account the current value too
160 40         36 my $ha = $wsum / $csum;
161 40         44 my $vol = $mad / $sqrt2pi;
162 40         30 my $diff = abs($spot - $ha);
163              
164             # ther there is zero-difference, we accept current piece of data;
165             # otherwise, if it is zero-volatility (flat), we reject it.
166 40 50       58 my $reject =
    50          
167             $diff
168             ? ($vol ? ($diff / $vol) : $reject_criterium + 1)
169             : 0;
170              
171 40         35 $accepted = !($reject > $reject_criterium);
172 40         86 $trust_w = 1 / (1 + ($reject / $reject_criterium)**8);
173              
174 40 50 66     59 if (not $accepted and $lookback_rejected > $cap * @lookback) {
175 0         0 ($accepted, $trust_w) = (1, 1);
176             }
177 40         62 $ad = abs($spot - $_accepted->[-$build_up_count][1]) / sqrt($build_up_count);
178 40 50       51 if ($ad) {
179 40         59 for my $idx (0 .. @$decay_speeds - 1) {
180             # MAD (mean absolute deviation)
181 120         111 my $mu = $trust_w * (1 - exp(-$density * $decay_speeds->[$idx]));
182 120         127 $mads[$idx] = (1 - $mu) * $mads[$idx] + $mu * $ad;
183             }
184             }
185 40         66 $mad = max(@mads);
186             }
187 213         181 push @minute, $epoch;
188 213         300 push @lookback, [$epoch, $spot, $trust_w, $accepted];
189 213 100       213 if ($accepted) {
190 212         213 push @$_accepted, [$epoch, $spot];
191 212         510 shift @$_accepted while @$_accepted > $build_up_count;
192             } else {
193 1         2 ++$lookback_rejected;
194             }
195 213 50       259 if ($trust_w) {
196 213         199 $wsum = 0.5 * $wsum + $spot * $trust_w;
197 213         148 $csum = 0.5 * $csum + $trust_w;
198             }
199 213         1027 return $accepted;
200 4         24 };
201 4         12 return $fn;
202             }
203              
204             1;