File Coverage

blib/lib/Set/Streak.pm
Criterion Covered Total %
statement 81 83 97.5
branch 36 48 75.0
condition 13 15 86.6
subroutine 6 6 100.0
pod 1 1 100.0
total 137 153 89.5


line stmt bran cond sub pod time code
1             package Set::Streak;
2              
3 2     2   453780 use 5.010001;
  2         7  
4 2     2   10 use strict;
  2         12  
  2         72  
5 2     2   8 use warnings;
  2         4  
  2         161  
6              
7 2     2   10 use Exporter 'import';
  2         2  
  2         2393  
8              
9             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
10             our $DATE = '2023-12-09'; # DATE
11             our $DIST = 'Set-Streak'; # DIST
12             our $VERSION = '0.004'; # VERSION
13              
14             our @EXPORT_OK = qw(gen_longest_streaks_table);
15              
16             our %SPEC;
17              
18             my $re = qr/\A(\d+)\.(.*)\z/;
19              
20             $SPEC{gen_longest_streaks_table} = {
21             v => 1.1,
22             summary => 'Generate ranking table of longest streaks',
23             description => <<'MARKDOWN',
24              
25             This routine can be used to generate a ranking table of longest streaks,
26             represented by sets. You supply a list (arrayref) of `sets`, each set
27             representing a period. The routine will rank the items that appear the longest
28             in consecutive sets (periods).
29              
30             For example, let's generate a table for longest daily CPAN releases for November
31             2023 up until today (assume today is the Nov 5th), the input will be:
32              
33             [
34             [qw/PERLANCAR DART JJATRIA NERDVANA LEEJO CUKEBOT RSCHUPP JOYREX TANIGUCHI OODLER OLIVER JV/], # period 1 (first): list of CPAN authors releasing something on Nov 1, 2023
35             [qw/SKIM PERLANCAR BURAK BDFOY SUKRIA AJNN YANGAK CCELSO SREZIC/], # period 2: list of CPAN authors releasing something on Nov 2, 2023
36             [qw/JGNI DTUCKWELL SREZIC WOUTER LSKATZ SVW RAWLEYFOW DJERIUS PERLANCAR CRORAA EINHVERFR ASPOSE/], # period 3: list of CPAN authors releasing something on Nov 3, 2023
37             [qw/JGNI LEONT LANCEW NKH MDOOTSON SREZIC PERLANCAR DROLSKY JOYREX JRM DAMI PRBRENAN DCHURCH/], # period 4: list of CPAN authors releasing something on Nov 4, 2023
38             [qw/JGNI JRM TEAM LICHTKIND JJATRIA JDEGUEST PERLANCAR SVW DRCLAW PLAIN SUKRIA RSCHUPP/], # period 5 (current): list of CPAN authors releasing something on Nov 5, 2023
39             ]
40              
41             The result of the routine will be like:
42              
43             [
44             { item => "PERLANCAR", len => 5, start => 1, status => "ongoing" },
45             { item => "SREZIC", len => 3, start => 2, status => "might-break" },
46             { item => "JGNI", len => 3, start => 3, status => "ongoing" },
47             { item => "JRM", len => 2, start => 4, status => "ongoing" },
48             { item => "CUKEBOT", len => 1, start => 1, status => "broken" },
49             { item => "DART", len => 1, start => 1, status => "broken" },
50             { item => "JJATRIA", len => 1, start => 1, status => "broken" },
51             ...
52             ]
53              
54             Sorting is done by `len` (descending) first, then by `start` (ascending), then
55             by `item` (ascending).
56              
57             MARKDOWN
58             args => {
59             sets => {
60             schema => 'aoaos*',
61             req => 1,
62             },
63             exclude_broken => {
64             summary => 'Whether to exclude broken streaks',
65             schema => 'bool*',
66             description => <<'MARKDOWN',
67              
68             Streak status is either: `ongoing` (item still appear in current period),
69             `might-break` (does not yet appear in current period, but current period is
70             assumed to have not ended yet, later update might still see item appearing), or
71             `broken` (no longer appear after its beginning period sometime in the past
72             periods).
73              
74             If you set this option to true, streaks that have the status of `broken` are not
75             returned.
76              
77             MARKDOWN
78             },
79              
80             # the advanced options are used for caching streaks data structure and
81             # reusing it later for faster update
82             raw => {
83             summary => 'Instead of streaks table, return the raw streaks hash',
84             schema => 'true*',
85             tags => ['category:advanced'],
86             },
87             streaks => {
88             summary => 'Initialize streaks hash with this',
89             schema => 'hash*',
90             tags => ['category:advanced'],
91             },
92             start_period => {
93             schema => 'posint*',
94             tags => ['category:advanced'],
95             },
96              
97             min_len => {
98             summary => 'Minimum length of streak to return',
99             schema => 'posint*',
100             },
101             },
102             args_rels => {
103             req_all => [qw/streaks start_period/],
104             },
105             result_naked => 1,
106             };
107             sub gen_longest_streaks_table {
108 5     5 1 372536 my %args = @_;
109              
110 5         21 my $sets = $args{sets};
111 5         9 my $prev_period = 0;
112              
113 5         9 my %streaks; # list of all streaks, key=".", value=[length, broken in which period]
114              
115             INIT_STREAKS:
116 5 100       19 if ($args{streaks}) {
117 2         3 %streaks = %{ $args{streaks} };
  2         13  
118 2         9 for my $key (keys %streaks) {
119 20         24 my $streak = $streaks{$key};
120 20 50       117 my ($start, $item) = $key =~ $re or die;
121 20         33 my $p = $start + $streak->[0] - 1;
122 20 100       44 $prev_period = $p if $prev_period < $p;
123             }
124             }
125              
126             INIT_START_PERIOD:
127 5         11 my $period;
128 5 100       15 if ($args{start_period}) {
129 2 50       11 if ($prev_period > 0) {
130             return [412, "Start period must be $prev_period or ".($prev_period+1)]
131 2 50 66     27 unless $args{start_period} == $prev_period || $args{start_period} == $prev_period+1;
132             } else {
133             return [412, "Start period must be 1"]
134 0 0       0 unless $args{start_period} == 1;
135             }
136 2         6 $period = $args{start_period} - 1;
137             } else {
138 3         26 $period = $prev_period;
139             }
140              
141 5         10 INIT_CURRENT_ITEMS:
142             my %current_items; # items with streaks currently going, key=item, val=starting period
143 5         19 for my $key (keys %streaks) {
144 20         21 my $streak = $streaks{$key};
145 20 50       91 my ($start, $item) = $key =~ $re or die;
146 20 100 100     69 $current_items{$item} = $start if !defined($streak->[1]) ||
147             $streak->[1] == $prev_period;
148             }
149              
150             FIND_STREAKS: {
151 5         10 for my $set (@$sets) {
  5         15  
152 15         24 $period++;
153 15         20 my %items_this_period; $items_this_period{$_}++ for @$set;
  15         65  
154 15         21 my %new_items_this_period;
155              
156             # find new streaks: items that just appear in this period
157             FIND_NEW: {
158 15         22 for my $item (@$set) {
  15         26  
159 56 100       115 next if $current_items{$item};
160 28         52 $current_items{$item} = $period;
161 28         101 $streaks{ $period . "." . $item } = [1, undef];
162 28         55 $new_items_this_period{$item} = 1;
163             }
164             } # FIND_NEW
165              
166             # find broken streaks: items that no longer appear in this period
167             FIND_BROKEN: {
168 15         21 for my $item (keys %current_items) {
  15         39  
169 84         146 my $key = $current_items{$item} . "." . $item;
170 84 100       147 if ($items_this_period{$item}) {
171 56 100       96 if ($period > $prev_period) {
172 53 100       153 unless ($new_items_this_period{$item}) {
173             # streak keeps going
174 26         52 $streaks{$key}[0]++;
175             }
176             } else {
177 3         4 $streaks{$key}[1] = undef;
178             }
179             } else {
180             # streak broken (but for current period, it might not
181             # just yet, the item might still appear later before the
182             # end of the current period. you just need to check if
183             # period number recorded is the current period
184 28   66     111 $streaks{$key}[1] //= $period;
185 28         58 delete $current_items{$item};
186             }
187             }
188             } # FIND_BROKEN
189             } # for $set
190             } # FIND_STREAKS
191              
192 5         27 my $cur_period = @$sets;
193             FILTER_STREAKS: {
194 5         9 for my $key (keys %streaks) {
  5         17  
195 48         66 my $streak = $streaks{$key};
196 48 100       83 if ($args{exclude_broken}) {
197 8 100 100     90 if (defined $streak->[1] && !($streak->[1] == $cur_period)) { delete $streaks{$key} }
  3         5  
198             }
199 48 50       101 if (defined $args{min_len}) {
200 0 0       0 delete $streaks{$key} if $streak->[0] < $args{min_len};
201             }
202             }
203             } # FILTER_STREAKS
204              
205             RETURN_RAW:
206 5 100       19 if ($args{raw}) {
207 3         17 return \%streaks;
208             }
209              
210 2         3 my @res;
211             RANK_STREAKS: {
212 2         4 require List::Rank;
  2         729  
213             my @rankres = List::Rank::sortrankby(
214             sub {
215 34     34   462 my $streak_a = $streaks{$a};
216 34         51 my $streak_b = $streaks{$b};
217 34 50       203 my ($start_a, $item_a) = $a =~ $re or die;
218 34 50       166 my ($start_b, $item_b) = $b =~ $re or die;
219 34 100 100     182 ($streak_b->[0] <=> $streak_a->[0]) || # longer streaks first
220             ($start_a <=> $start_b) || # earlier streaks first
221             ($item_a cmp $item_b); # asciibetically
222             },
223 2         1939 keys %streaks
224             );
225              
226 2         64 while (my ($key, $rank) = splice @rankres, 0, 2) {
227 13 50       92 my ($start, $item) = $key =~ $re or die;
228 13         23 my $streak = $streaks{$key};
229 13 100       35 my $status = !defined($streak->[1]) ? "ongoing" :
    100          
230             ($streak->[1] < $cur_period) ? "broken" : "might-break";
231 13         93 push @res, {
232             item => $item,
233             start => $start,
234             len => $streak->[0],
235             status => $status,
236             };
237             }
238             } # RANK_STREAKS
239              
240 2         18 \@res;
241             }
242              
243             1;
244             # ABSTRACT: Routines related to streaks (longest item appearance in consecutive sets)
245              
246             __END__