File Coverage

blib/lib/Sport/Analytics/NHL.pm
Criterion Covered Total %
statement 245 354 69.2
branch 46 110 41.8
condition 31 83 37.3
subroutine 32 35 91.4
pod 14 14 100.0
total 368 596 61.7


line stmt bran cond sub pod time code
1             package Sport::Analytics::NHL;
2              
3 19     19   672067 use v5.10.1;
  19         151  
4 19     19   91 use strict;
  19         35  
  19         400  
5 19     19   76 use warnings FATAL => 'all';
  19         30  
  19         628  
6 19     19   529 use experimental qw(smartmatch);
  19         2780  
  19         164  
7              
8 19     19   1024 use File::Basename;
  19         32  
  19         1475  
9 19     19   7761 use Storable qw(store retrieve dclone);
  19         36317  
  19         1117  
10 19     19   5900 use POSIX qw(strftime);
  19         69854  
  19         126  
11              
12 19     19   23096 use List::MoreUtils qw(uniq);
  19         139173  
  19         138  
13 19     19   22146 use JSON -convert_blessed_universally;
  19         132841  
  19         117  
14              
15 19     19   10761 use Sport::Analytics::NHL::LocalConfig;
  19         40  
  19         2010  
16 19     19   113 use Sport::Analytics::NHL::Config;
  19         31  
  19         2641  
17 19     19   6309 use Sport::Analytics::NHL::Errors;
  19         167  
  19         3326  
18              
19 19   33 19   7408 use if ! $ENV{HOCKEYDB_NODB} && $MONGO_DB, 'Sport::Analytics::NHL::DB';
  19         164  
  19         239  
20 19     19   8877 use Sport::Analytics::NHL::Merger;
  19         51  
  19         1018  
21 19     19   8344 use Sport::Analytics::NHL::Normalizer;
  19         124  
  19         1210  
22 19     19   129 use Sport::Analytics::NHL::Report;
  19         43  
  19         362  
23 19     19   87 use Sport::Analytics::NHL::Scraper;
  19         37  
  19         699  
24 19     19   102 use Sport::Analytics::NHL::Test;
  19         32  
  19         3184  
25 19     19   123 use Sport::Analytics::NHL::Tools;
  19         42  
  19         2599  
26 19     19   125 use Sport::Analytics::NHL::Util;
  19         45  
  19         1006  
27              
28 19     19   154 use parent 'Exporter';
  19         43  
  19         112  
29              
30             =head1 NAME
31              
32             Sport::Analytics::NHL - Crawl data from NHL.com and put it into a database
33              
34             =head1 VERSION
35              
36             Version 1.31
37              
38             =cut
39              
40             our @EXPORT = qw(
41             hdb_version
42             );
43              
44             our $VERSION = "1.31";
45              
46             =head1 SYNOPSIS
47              
48             Crawl data from NHL.com and put it into a database.
49              
50             Crawls the NHL.com website, processes the game reports and stores them into a Mongo database or into the filesystem.
51              
52             use Sport::Analytics::NHL;
53              
54             my $nhl = Sport::Analytics::NHL->new();
55             $nhl->scrape_games();
56             ...
57             # more functionality to be added in later releases.
58              
59             =head1 EXPORT
60              
61             hdb_version() - report the version. All the other interface is OOP via the new() constructor.
62              
63             =cut
64              
65             sub hdb_version () {
66              
67 1     1 1 883 $VERSION;
68             }
69              
70             =head1 METHODS
71              
72             =over 2
73              
74             =item C
75              
76             Returns the current version of the package
77              
78             =item C
79              
80             Returns a new Sport::Analytics::NHL object. If a Mongo DB is configured, the connection to the database is established, and the handle is stored in the object.
81              
82             =item C
83              
84             Parses various game arguments to the scrape_games() method:
85             * NHL IDs of format SSSS0TIIII (2016020201)
86             * Our IDs of format SSSSTIIII (201620201)
87             * Dates in format YYYYMMDD (20160202)
88              
89             where S stands for starting year of season, T - stage (2 - regular, 3 - playoffs), I - the ID of the game within the year.
90              
91             Modifies the games array reference passed as the first argument, and dates array reference passed as the second argument, using the list of number strings as the remaining list of arguments.
92              
93             =item C
94              
95             Gets a list of already crawled games on given list of dates. Crawls the season schedule on the NHL website if necessary.
96             Arguments: the options to pass to the scraper that crawls and the list of the dates.
97             Returns: the list of game structures which are hash references with the following fields:
98             * season
99             * stage
100             * season id
101             * Our game ID (see the previous section)
102              
103             =item C
104              
105             Gets a list of scheduled, uncrawled games in the filesystem, based on the schedules already stored in, or crawled into the system.
106             Argument: options hashref that specifies whether new schedules should be crawled, and only specific stage should be filtered.
107             Returns: the list of game structures which are hash references with the following fields:
108             * season
109             * stage
110             * season id
111             * Our game ID (see the previous section)
112              
113             =item C
114              
115             Same as the previous method, but the information is extracted from the Mongo database rather than the filesystem.
116              
117             =item C
118              
119             The generic wrapper for the two previous methods.
120              
121             =item C
122              
123             Scrape the games reports from the NHL website and store them in files on the disk.
124             Arguments: the hashref of options for the scrape -
125             * no_schedule_crawl - whether fresh schedule should be crawled
126             * start_season - the first season to start scraping from (default 1917)
127             * stop_season - the last season to scrape (default - ongoing)
128             * stage - 2 for Regular, 3 for Playoffs, none for both (default - none)
129             * force - override the already present files and data
130              
131             =item C
132              
133             Compiles a single JSON or HTML report into a parsed hashref and stores it in a Storable file
134             Arguments:
135             * The options hashref -
136             - force: Force overwrite of already existing file
137             - test: Test the resulted parsed report
138             * The file
139             * Our SSSSTNNNN game id
140             * Optional: preset type of the report
141              
142             Returns: the path to the compiled file
143              
144             =item C
145              
146             Compiles reports retrieved into the filesystem into parsed hashrefs and stores them in a Storable file.
147             Arguments:
148             * The options hashref -
149             - force: Force overwrite of already existing file
150             - test: Test the resulted parsed report
151             - doc: limit compilation to these Report types
152             - data_dir: the root directory of the reports
153             * The list of game ids
154              
155             Returns: the location of the compiled storables
156              
157             =item C
158              
159             Retrieves the compiled storable file for the given game ID and file type.
160             Compiles the file anew unless explicitly prohibited from doing so.
161              
162             Arguments:
163             * The options hashref -
164             - no_compile: don't compile files if required
165             - recompile: force recompilation
166             * game ID
167             * doc type (e.g. BS, PL, RO, ...)
168             * path to the storable file.
169             The file is expected at location $path/$doc.storable
170              
171             Returns: the file structure retrieved from storable, or undef.
172              
173             =item C
174              
175             Merges reports compiled in the filesystem into one boxscore hashref and stores it in a Storable file.
176              
177             Arguments:
178             * The options hashref -
179             - force: Force overwrite of already existing file
180             - test: Test the resulted parsed report
181             - doc: limit compilation to these Report types
182             - data_dir: the root directory of the reports
183             - no_compile: don't compile files if required
184             - recompile: force recompilation
185             * The list of game ids
186              
187             Returns: the location of the merged storable
188              
189             =item C
190              
191             Checks the consistency between the summarized events and the summary data in the boxscore itself. If there are inconsistencies, the game files are recompiled and remerged and some fix If there are unfixable inconsistencies, the check dies.
192              
193             Arguments:
194             * The merged file (to manage the game files)
195             * The boxscore to summarize
196             * The produced summary of events
197              
198             Returns: void. Dies if something goes wrong.
199              
200             =item C
201              
202             Normalizes the merged boxscore, providing default values and erasing unnecessary data from the boxscore data structure. Saves the normalized boxscore both as a Perl storable and as a JSON. This is the highest level of integration that this package provides without a database (Mongo) interface.
203              
204              
205             Arguments:
206             * The options hashref -
207             - force: Force overwrite of already existing file
208             - test: Test the resulted parsed report
209             - doc: limit compilation to these Report types
210             - data_dir: the root directory of the reports
211             - no_compile: don't compile files if required
212             - recompile: force recompilation
213             - no_merge: don't merge files if required
214             - remerge: force remerging
215             * The list of game ids
216              
217             Returns: the location of the normalized storable(s). The JSON would be in the same directory.
218              
219             =back
220              
221             =cut
222              
223             sub new ($$) {
224              
225 8     8 1 86707 my $class = shift;
226 8         74 my $opts = shift;
227              
228 8         88 my $self = {};
229 8 50 100     329 unless ($opts->{no_database} || $ENV{HOCKEYDB_NODB} || ! $MONGO_DB) {
      66        
230 0   0     0 $self->{db} = Sport::Analytics::NHL::DB->new($opts->{database} || $ENV{HOCKEYDB_DBNAME} || $MONGO_DB);
231             }
232 8 100       111 $ENV{HOCKEYDB_DATA_DIR} = $DATA_DIR = $opts->{data_dir} if $opts->{data_dir};
233 8         67 bless $self, $class;
234 8         94 $self;
235             }
236              
237             sub parse_game_args ($$@) {
238              
239 2     2 1 889 my $games = shift;
240 2         23 my $dates = shift;
241 2         16 my @args = @_;
242              
243 2         18 for (@args) {
244 9         17 my $game = {};
245 9         31 when (/^\d{10}$/) { $game = parse_nhl_game_id($_); push(@{$games}, $game) }
  2         4  
  2         3  
  2         5  
246 7         41 when (/^\d{9}$/ ) { $game = parse_our_game_id($_); push(@{$games}, $game) }
  5         47  
  5         16  
  5         29  
247 2         5 when (/^\d{8}$/ ) { push(@{$dates}, $_) }
  2         3  
  2         6  
248 0         0 default { warn "[WARNING] Unrecognized argument $_, skipping\n" }
  0         0  
249             }
250             }
251              
252             sub get_crawled_games_for_dates ($$@) {
253              
254 1     1 1 6 my $self = shift;
255 1         2 my $opts = shift;
256 1         3 my @dates = @_;
257              
258 1         4 my $schedules = {};
259 1         2 my $schedule_by_date = {};
260 1         2 my @games = ();
261 1         3 for my $date (@dates) {
262             $opts->{start_season} = $opts->{stop_season} =
263 4         22 get_season_from_date($date);
264 4 100       22 unless ($schedules->{$opts->{start_season}}) {
265 3         18 $schedules = crawl_schedule($opts);
266             arrange_schedule_by_date(
267             $schedule_by_date,
268             $schedules->{$opts->{start_season}}
269 3         29 );
270 0         0 $self->{db}->insert_schedule(values %{$schedule_by_date})
271 3 50       33 if $self->{db};
272             }
273 4 50       18 unless ($schedule_by_date->{$date}) {
274 0         0 print STDERR "No games scheduled for $date, skipping...\n";
275 0         0 next;
276             }
277 4         8 push(@games, @{$schedule_by_date->{$date}});
  4         24  
278             }
279 1         3319 @games;
280             }
281              
282             sub get_nodb_scheduled_games ($) {
283              
284 1     1 1 2157 my $opts = shift;
285              
286 1         4 my @games = ();
287             my $schedules = $opts->{no_schedule_crawl} ?
288 1 50       13 read_schedules($opts) : crawl_schedule($opts);
289 1         3 for my $season (keys %{$schedules}) {
  1         6  
290 2         19 debug "NODB schedule SEASON $season";
291             my $existing_game_ids =
292 2 50       16 $opts->{force} ? {} : read_existing_game_ids($season);
293             my $season_schedule = ref $schedules->{$season} eq 'ARRAY'
294             ? $schedules->{$season}
295 2 50       14 : [map(@{$_->{games}}, @{$schedules->{$season}{dates}})];
  0         0  
  0         0  
296 2         5 for my $schedule_game ( @{$season_schedule} ) {
  2         6  
297 2706         5683 my $game = convert_schedule_game($schedule_game);
298 2706 100       5518 next unless $game;
299             next unless ($opts->{stage} && $game->{stage} == $opts->{stage})
300             || (!$opts->{stage}
301 2672 50 33     12463 && ($game->{stage} == $REGULAR || $game->{stage} == $PLAYOFF)
      66        
      33        
      33        
302             );
303 2672 50       5782 next if $existing_game_ids->{$game->{game_id}};
304 2672         6356 push(@games, $game);
305             }
306             }
307 1         1392 @games;
308             }
309              
310             sub get_db_scheduled_games ($$) {
311              
312 0     0 1 0 my $self = shift;
313 0         0 my $opts = shift;
314              
315 0         0 my @games = ();
316             my $existing_game_ids = $opts->{force}
317             ? []
318 0 0       0 : $self->{db}->get_existing_game_ids($opts);
319              
320 0 0       0 if ( !$opts->{no_schedule_crawl} ) {
321 0         0 my $schedules = crawl_schedule($opts);
322 0         0 for my $season ( sort keys %{$schedules} ) {
  0         0  
323 0         0 my $schedule_by_date = {};
324             arrange_schedule_by_date(
325             $schedule_by_date,
326 0         0 $schedules->{$season}
327             );
328 0         0 $self->{db}->insert_schedule(values %{$schedule_by_date});
  0         0  
329             }
330             }
331              
332 0         0 debug scalar(@{$existing_game_ids}) . " total existing games";
  0         0  
333             @games = $self->{db}{dbh}->get_collection('schedule')->find(
334             {
335             game_id => { '$nin' => $existing_game_ids },
336             $opts->{stage} ? ( stage => $opts->{stage}+0 ) : (),
337             season => {
338             '$gte' => $opts->{start_season}+0,
339 0 0       0 '$lte' => $opts->{stop_season} +0,
340             },
341             }
342             )->all();
343 0         0 @games;
344             }
345              
346             sub get_scheduled_games ($$) {
347              
348 0     0 1 0 my $self = shift;
349 0         0 my $opts = shift;
350              
351 0   0     0 $opts->{start_season} ||= $CURRENT_SEASON;
352 0   0     0 $opts->{stop_season} ||= $CURRENT_SEASON;
353              
354             $self->{db}
355 0 0       0 ? $self->get_db_scheduled_games($opts)
356             : get_nodb_scheduled_games($opts);
357             }
358              
359             sub scrape_games ($$;@) {
360              
361 1     1 1 25 my $self = shift;
362 1         16 my $opts = shift;
363 1         12 my @args = @_;
364              
365 1         13 my @games = ();
366 1 50       29 if (@args) {
367 1         4 my @dates = ();
368 1         27 parse_game_args(\@games, \@dates, @args);
369             push(
370             @games,
371             $opts->{no_schedule_crawl}
372 1 0       20 ? get_games_for_dates(@dates)
    50          
373             : $self->get_crawled_games_for_dates( $opts, @dates ),
374             ) if @dates;
375             }
376             else {
377 0         0 @games = $self->get_scheduled_games($opts);
378             }
379 1 50       19 unless (@games) {
380 0         0 print STDERR "No games to crawl found!\n";
381 0         0 return ();
382             }
383 1         13 my @got_games;
384 1   33     27 @games = sort { ($a->{ts} || $a->{game_id}) <=> ($b->{ts} || $b->{game_id}) } @games;
  3   33     76  
385 1         5 for my $game (@games) {
386 3 50 33     39 if ($game->{date} && $game->{date} > strftime("%Y%m%d", localtime)) {
387 0         0 print "Game $game->{_id} is in the future ($game->{date}), wrapping up\n";
388 0         0 last;
389             }
390 3         38 my $crawled_game = crawl_game($game);
391 3         13 push(@got_games, map($_->{file}, values %{$crawled_game->{content}}));
  3         384  
392             }
393 1         14 @got_games;
394             }
395              
396             sub compile_file ($$$$) {
397              
398 13     13 1 33 my $opts = shift;
399 13         30 my $file = shift;
400 13         26 my $game_id = shift;
401 13   50     84 my $type = shift || 'XX';
402              
403 13         48 my $args = { file => $file };
404 13 0 33     103 if (
      33        
405             $BROKEN_FILES{$game_id}->{$type} &&
406             $BROKEN_FILES{$game_id}->{$type} != $UNSYNCHED &&
407             $BROKEN_FILES{$game_id}->{$type} != $NO_EVENTS
408             ) {
409 0         0 print STDERR "File $file is broken, skipping\n";
410 0         0 return undef;
411             }
412 13         28 my $storable = $file;
413 13         93 $storable =~ s/\.([a-z]+)$/.storable/i;
414 13 100 66     331 if (!$opts->{force} && -f $storable && -M $storable < -M $file) {
      66        
415 5         121 print STDERR "File $storable already exists, skipping\n";
416 5         31 return $storable;
417             }
418 8         139 my $report = Sport::Analytics::NHL::Report->new($args);
419 8         82 $report->process();
420 8 50       43 if ($opts->{test}) {
421 0         0 test_boxscore($report, { lc $args->{type} => 1 });
422 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
423 0         0 $TEST_COUNTER->{Curr_Test} = 0;
424             }
425 8         63 store $report, $storable;
426 8         25698 debug "Wrote $storable";
427              
428 8         2034 $storable;
429              
430             }
431              
432             sub compile ($$@) {
433              
434 1     1 1 17 my $self = shift;
435 1         11 my $opts = shift;
436 1         11 my @game_ids = @_;
437              
438 1         10 my @storables = ();
439 1         14 for my $game_id (@game_ids) {
440 1         11 $ENV{GS_KEEP_PENL} = 0;
441 1 50       20 if (defined $DEFAULTED_GAMES{$game_id}) {
442 0         0 print STDERR "Skipping defaulted game $game_id\n";
443 0         0 next;
444             }
445 1         25 my @game_files = get_game_files_by_id($game_id, $opts->{data_dir});
446 1 0 33     23 if (
      33        
447             $BROKEN_FILES{$game_id}->{BS} &&
448             $BROKEN_FILES{$game_id}->{BS} == $NO_EVENTS &&
449 0         0 !grep { /PL/ } @game_files
450             ) {
451 0         0 $ENV{GS_KEEP_PENL} = 1;
452             }
453 1         9 for my $game_file (@game_files) {
454 6         40 $game_file =~ m|/([A-Z]{2}).[a-z]{4}$|;
455 6         16 my $type = $1;
456 6 50 33     21 next if ($opts->{doc} && !grep {$_ eq $type} @{$opts->{doc}});
  0         0  
  0         0  
457 6         21 my $storable = compile_file($opts, $game_file, $game_id, $type);
458 6 50       46 push(@storables, $storable) if $storable;
459             }
460             }
461 1         20 return @storables;
462             }
463              
464             sub retrieve_compiled_report ($$$$) {
465              
466 29     29 1 361571 my $opts = shift;
467 29         70 my $game_id = shift;
468 29         67 my $doc = shift;
469 29         67 my $path = shift;
470              
471 29         891 @Sport::Analytics::NHL::Report::RO::ISA = qw(Sport::Analytics::NHL::Report);
472 29         667 @Sport::Analytics::NHL::Report::PL::ISA = qw(Sport::Analytics::NHL::Report);
473 29         624 @Sport::Analytics::NHL::Report::GS::ISA = qw(Sport::Analytics::NHL::Report);
474 29         652 @Sport::Analytics::NHL::Report::ES::ISA = qw(Sport::Analytics::NHL::Report);
475 29         183 my $doc_storable = "$path/$doc.storable";
476 29 100       155 my $doc_source = "$path/$doc." . ($doc eq 'BS' ? 'json' : 'html');
477              
478 29         197 debug "Looking for file $doc_storable or $doc_source";
479 29 100 66     1066 return retrieve $doc_storable if -f $doc_storable && ! $opts->{recompile};
480 13 100       62 if ($opts->{no_compile}) {
481 1         33 print STDERR "$doc: No storable file and no-compile option specified, skipping\n";
482 1         732 return undef;
483             }
484 12 100       210 if (! -f $doc_source) {
485 5         119 print STDERR "$doc: No storable and no source report available, skipping\n";
486 5         930 return undef;
487             }
488 7         51 debug "Compiling $doc_source";
489 7         44 $doc_storable = compile_file($opts, $doc_source, $game_id, $doc);
490 7 50       81 retrieve $doc_storable if $doc_storable;
491             }
492              
493             sub merge ($$@) {
494              
495 12     12 1 297481 my $self = shift;
496 12         37 my $opts = shift;
497 12         63 my @game_ids = @_;
498              
499 12         51 my @storables = ();
500              
501 12         65 for my $game_id (@game_ids) {
502 12 50       85 if (defined $DEFAULTED_GAMES{$game_id}) {
503 0         0 print STDERR "Skipping defaulted game $game_id\n";
504 0         0 next;
505             }
506 12         138 my $path = get_game_path_from_id($game_id, $opts->{data_dir});
507 12         77 my $merged = "$path/$MERGED_FILE";
508 12 100 66     333 if (! $opts->{force} && -f $merged) {
509 10         484 print STDERR "Merged file $merged already exists, skipping\n";
510 10         73 push(@storables, $merged);
511 10         57 next;
512             }
513 2   50     18 $opts->{doc} ||= [];
514 2         8 $opts->{doc} = [qw(PL RO GS ES)];
515 2         12 my $boxscore = retrieve_compiled_report($opts, $game_id, 'BS', $path);
516 2         3873 $boxscore->{sources} = {BS => 1};
517 2 50       8 next unless $boxscore;
518 2         13 $boxscore->build_resolve_cache();
519 2         33 $boxscore->set_event_extra_data();
520 2         6 for my $doc (@{$opts->{doc}}) {
  2         7  
521 8         24 my $report = retrieve_compiled_report($opts, $game_id, $doc, $path);
522 8 100       5972 merge_report($boxscore, $report) if $report;
523             }
524 2 50       11 if ($opts->{test}) {
525 0         0 test_merged_boxscore($boxscore);
526 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
527 0         0 $TEST_COUNTER->{Curr_Test} = 0;
528             }
529 2         13 debug "Storing $merged";
530 2         14 store($boxscore, $merged);
531 2         7881 push(@storables, $merged)
532             }
533 12         74 return @storables;
534             }
535              
536             sub check_consistency ($$$;$) {
537              
538 0     0 1 0 my $merged_file = shift;
539 0         0 my $boxscore = shift;
540 0         0 my $event_summary = shift;
541              
542 0         0 my $to_die = 0;
543 0         0 my $loop = 1;
544              
545 0         0 my $frozen_event_summary = $event_summary;
546 0         0 while ($loop) {
547 0         0 $event_summary = dclone $frozen_event_summary;
548 0         0 eval {
549             test_consistency($boxscore, $event_summary)
550             unless $BROKEN_FILES{$boxscore->{_id}}->{BS}
551 0 0 0     0 && keys(%{$boxscore->{sources}}) <= 1;
  0         0  
552             };
553 0 0       0 if ($@) {
554 0         0 my $error = $@;
555 0         0 my $path = dirname($merged_file);
556 0         0 unlink for glob("$path/*.storable");
557 0 0       0 die $error if $to_die == 1;
558 0         0 print STDERR "Trying to fix error: $error";
559 0 0       0 if ($error =~ /team.*(0|1).*playergo.*consistent: (\d+) vs (\d+)/i) {
560 0         0 verbose "Fixing team playergoals";
561 0         0 my $t = $1;
562 0         0 fix_playergoals($boxscore, $t, $event_summary);
563 0         0 store $boxscore, $merged_file;
564 0         0 $to_die = 1;
565 0         0 next;
566             }
567             else {
568 0         0 $error =~ /(\d{7})/; my $player = $1;
  0         0  
569 0 0       0 die $error if $to_die == $player;
570 0 0 0     0 if ($boxscore->{season} < 1945 && $error =~ /assists/) {
    0          
    0          
571 0         0 $error =~ / (\d{7}).* (\d) vs (\d)/;
572 0 0       0 if ($2 == $3 + 1) {
573 0         0 set_player_stat($boxscore, $1, 'assists', $3);
574 0         0 store $boxscore, $merged_file;
575             }
576             }
577             elsif ($error =~ /goalsAgainst/) {
578 0         0 $error =~ / (\d{7}).* (\d+) vs (\d+)/;
579 0         0 set_player_stat($boxscore, $1, 'goalsAgainst', $3);
580 0         0 store $boxscore, $merged_file;
581             }
582             elsif ($error =~ /penaltyMinutes/ ) {
583 0         0 $error =~ / (\d{7}).* (\d+) vs (\d+)/;
584             my $result = set_player_stat(
585             $boxscore, $1, 'penaltyMinutes', $3,
586             $event_summary->{$1}{_servedbyMinutes},
587 0   0     0 ) || 0;
588 0 0       0 store $boxscore, $merged_file unless $result;
589             }
590 0         0 $to_die = $player;
591             }
592             }
593             else {
594 0         0 $loop = 0;
595             }
596             }
597             }
598              
599             sub normalize ($$@) {
600              
601 2     2 1 1302 my $self = shift;
602 2         15 my $opts = shift;
603 2         15 my @game_ids = @_;
604              
605 2         18 my @storables = ();
606              
607 2         25 for my $game_id (@game_ids) {
608 2 50       36 if (defined $DEFAULTED_GAMES{$game_id}) {
609 0         0 print STDERR "Skipping defaulted game $game_id\n";
610 0         0 next;
611             }
612 2         12 my $repeat = -1;
613 2         15 REPEAT:
614             $repeat++;
615 2         35 my $path = get_game_path_from_id($game_id);
616 2         35 my $normalized = "$path/$NORMALIZED_FILE";
617 2 50 33     93 if (! $opts->{force} && -f $normalized) {
618 0         0 print STDERR "Normalized file $normalized already exists, skipping\n";
619 0         0 push(@storables, $normalized);
620 0         0 next;
621             }
622 2         36 my @merged = $self->merge($opts, $game_id);
623 2         39 my $boxscore = retrieve $merged[0];
624 2   50     11286 $boxscore->{sources}{BS} ||= 1;
625 2 50       16 if (! $boxscore) {
626 0         0 print STDERR "Couldn't retrieve the merged file, skipping";
627 0         0 next;
628             }
629 2         63 my $event_summary = summarize($boxscore);
630 2 50       12 if ($opts->{test}) {
631 0         0 check_consistency($merged[0], $boxscore, $event_summary);
632 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
633 0         0 $TEST_COUNTER->{Curr_Test} = 0;
634             }
635 2         4 eval {
636 2         9 normalize_boxscore($boxscore, 1);
637             };
638 2 50       8 if ($@) {
639 0         0 unlink for glob("$path/*.storable");
640 0         0 die $@;
641             }
642 2 50       7 if ($opts->{test}) {
643 0         0 eval {
644 0         0 test_normalized_boxscore($boxscore);
645             };
646 0 0       0 if ($@) {
647 0         0 unlink $merged[0];
648 0 0       0 goto REPEAT if ! $repeat;
649             }
650 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
651 0         0 $TEST_COUNTER->{Curr_Test} = 0;
652             }
653 2         23 debug "Storing $normalized";
654 2         143 my $json = JSON->new()->pretty(1)->allow_nonref->convert_blessed;
655 2         136 write_file($json->encode($boxscore), "$path/$NORMALIZED_JSON");
656 2         70 store($boxscore, $normalized);
657 2         5838 push(@storables, $normalized);
658             }
659 2         27 return @storables;
660             }
661              
662             =head1 AUTHOR
663              
664             More Hockey Stats, C<< >>
665              
666             =head1 BUGS
667              
668             Please report any bugs or feature requests to C, or through
669             the web interface at L. I will be notified, and then you'll
670             automatically be notified of progress on your bug as I make changes.
671              
672              
673              
674              
675             =head1 SUPPORT
676              
677             You can find documentation for this module with the perldoc command.
678              
679             perldoc Sport::Analytics::NHL
680              
681             You can also look for information at:
682              
683             =over 4
684              
685             =item * RT: CPAN's request tracker (report bugs here)
686              
687             L
688              
689             =item * AnnoCPAN: Annotated CPAN documentation
690              
691             L
692              
693             =item * CPAN Ratings
694              
695             L
696              
697             =item * Search CPAN
698              
699             L
700              
701             =back
702              
703              
704             =head1 ACKNOWLEDGEMENTS
705              
706              
707             =head1 LICENSE AND COPYRIGHT
708              
709             Copyright 2018 More Hockey Stats.
710              
711             This program is released under the following license: gnu
712              
713              
714             =cut
715              
716             1; # End of Sport::Analytics::NHL