File Coverage

blib/lib/Sport/Analytics/NHL.pm
Criterion Covered Total %
statement 245 354 69.2
branch 46 110 41.8
condition 31 83 37.3
subroutine 32 35 91.4
pod 14 14 100.0
total 368 596 61.7


line stmt bran cond sub pod time code
1             package Sport::Analytics::NHL;
2              
3 19     19   636962 use v5.10.1;
  19         138  
4 19     19   91 use strict;
  19         31  
  19         388  
5 19     19   113 use warnings FATAL => 'all';
  19         30  
  19         580  
6 19     19   539 use experimental qw(smartmatch);
  19         2953  
  19         115  
7              
8 19     19   967 use File::Basename;
  19         35  
  19         1331  
9 19     19   7009 use Storable qw(store retrieve dclone);
  19         34843  
  19         1059  
10 19     19   5477 use POSIX qw(strftime);
  19         66428  
  19         101  
11              
12 19     19   20869 use List::MoreUtils qw(uniq);
  19         130994  
  19         111  
13 19     19   20531 use JSON -convert_blessed_universally;
  19         125454  
  19         121  
14              
15 19     19   10009 use Sport::Analytics::NHL::LocalConfig;
  19         39  
  19         1996  
16 19     19   111 use Sport::Analytics::NHL::Config;
  19         34  
  19         2605  
17 19     19   6141 use Sport::Analytics::NHL::Errors;
  19         155  
  19         3215  
18              
19 19   33 19   7277 use if ! $ENV{HOCKEYDB_NODB} && $MONGO_DB, 'Sport::Analytics::NHL::DB';
  19         162  
  19         188  
20 19     19   7741 use Sport::Analytics::NHL::Merger;
  19         49  
  19         979  
21 19     19   7951 use Sport::Analytics::NHL::Normalizer;
  19         74  
  19         1095  
22 19     19   122 use Sport::Analytics::NHL::Report;
  19         35  
  19         347  
23 19     19   81 use Sport::Analytics::NHL::Scraper;
  19         32  
  19         650  
24 19     19   110 use Sport::Analytics::NHL::Test;
  19         34  
  19         3026  
25 19     19   119 use Sport::Analytics::NHL::Tools;
  19         39  
  19         2365  
26 19     19   111 use Sport::Analytics::NHL::Util;
  19         32  
  19         960  
27              
28 19     19   101 use parent 'Exporter';
  19         41  
  19         94  
29              
30             =head1 NAME
31              
32             Sport::Analytics::NHL - Crawl data from NHL.com and put it into a database
33              
34             =head1 VERSION
35              
36             Version 1.20
37              
38             =cut
39              
40             our @EXPORT = qw(
41             hdb_version
42             );
43              
44             our $VERSION = "1.30";
45              
46             =head1 SYNOPSIS
47              
48             Crawl data from NHL.com and put it into a database.
49              
50             Crawls the NHL.com website, processes the game reports and stores them into a Mongo database or into the filesystem.
51              
52             use Sport::Analytics::NHL;
53              
54             my $nhl = Sport::Analytics::NHL->new();
55             $nhl->scrape_games();
56             ...
57             # more functionality to be added in later releases.
58              
59             =head1 EXPORT
60              
61             hdb_version() - report the version. All the other interface is OOP via the new() constructor.
62              
63             =cut
64              
65             sub hdb_version () {
66              
67 1     1 1 857 $VERSION;
68             }
69              
70             =head1 METHODS
71              
72             =over 2
73              
74             =item C
75              
76             Returns the current version of the package
77              
78             =item C
79              
80             Returns a new Sport::Analytics::NHL object. If a Mongo DB is configured, the connection to the database is established, and the handle is stored in the object.
81              
82             =item C
83              
84             Parses various game arguments to the scrape_games() method:
85             * NHL IDs of format SSSS0TIIII (2016020201)
86             * Our IDs of format SSSSTIIII (201620201)
87             * Dates in format YYYYMMDD (20160202)
88              
89             where S stands for starting year of season, T - stage (2 - regular, 3 - playoffs), I - the ID of the game within the year.
90              
91             Modifies the games array reference passed as the first argument, and dates array reference passed as the second argument, using the list of number strings as the remaining list of arguments.
92              
93             =item C
94              
95             Gets a list of already crawled games on given list of dates. Crawls the season schedule on the NHL website if necessary.
96             Arguments: the options to pass to the scraper that crawls and the list of the dates.
97             Returns: the list of game structures which are hash references with the following fields:
98             * season
99             * stage
100             * season id
101             * Our game ID (see the previous section)
102              
103             =item C
104              
105             Gets a list of scheduled, uncrawled games in the filesystem, based on the schedules already stored in, or crawled into the system.
106             Argument: options hashref that specifies whether new schedules should be crawled, and only specific stage should be filtered.
107             Returns: the list of game structures which are hash references with the following fields:
108             * season
109             * stage
110             * season id
111             * Our game ID (see the previous section)
112              
113             =item C
114              
115             Same as the previous method, but the information is extracted from the Mongo database rather than the filesystem.
116              
117             =item C
118              
119             The generic wrapper for the two previous methods.
120              
121             =item C
122              
123             Scrape the games reports from the NHL website and store them in files on the disk.
124             Arguments: the hashref of options for the scrape -
125             * no_schedule_crawl - whether fresh schedule should be crawled
126             * start_season - the first season to start scraping from (default 1917)
127             * stop_season - the last season to scrape (default - ongoing)
128             * stage - 2 for Regular, 3 for Playoffs, none for both (default - none)
129             * force - override the already present files and data
130              
131             =item C
132              
133             Compiles a single JSON or HTML report into a parsed hashref and stores it in a Storable file
134             Arguments:
135             * The options hashref -
136             - force: Force overwrite of already existing file
137             - test: Test the resulted parsed report
138             * The file
139             * Our SSSSTNNNN game id
140             * Optional: preset type of the report
141              
142             Returns: the path to the compiled file
143              
144             =item C
145              
146             Compiles reports retrieved into the filesystem into parsed hashrefs and stores them in a Storable file.
147             Arguments:
148             * The options hashref -
149             - force: Force overwrite of already existing file
150             - test: Test the resulted parsed report
151             - doc: limit compilation to these Report types
152             - data_dir: the root directory of the reports
153             * The list of game ids
154              
155             Returns: the location of the compiled storables
156              
157             =item C
158              
159             Retrieves the compiled storable file for the given game ID and file type.
160             Compiles the file anew unless explicitly prohibited from doing so.
161              
162             Arguments:
163             * The options hashref -
164             - no_compile: don't compile files if required
165             - recompile: force recompilation
166             * game ID
167             * doc type (e.g. BS, PL, RO, ...)
168             * path to the storable file.
169             The file is expected at location $path/$doc.storable
170              
171             Returns: the file structure retrieved from storable, or undef.
172              
173             =item C
174              
175             Merges reports compiled in the filesystem into one boxscore hashref and stores it in a Storable file.
176              
177             Arguments:
178             * The options hashref -
179             - force: Force overwrite of already existing file
180             - test: Test the resulted parsed report
181             - doc: limit compilation to these Report types
182             - data_dir: the root directory of the reports
183             - no_compile: don't compile files if required
184             - recompile: force recompilation
185             * The list of game ids
186              
187             Returns: the location of the merged storable
188              
189             =item C
190              
191             Checks the consistency between the summarized events and the summary data in the boxscore itself. If there are inconsistencies, the game files are recompiled and remerged and some fix If there are unfixable inconsistencies, the check dies.
192              
193             Arguments:
194             * The merged file (to manage the game files)
195             * The boxscore to summarize
196             * The produced summary of events
197              
198             Returns: void. Dies if something goes wrong.
199              
200             =item C
201              
202             Normalizes the merged boxscore, providing default values and erasing unnecessary data from the boxscore data structure. Saves the normalized boxscore both as a Perl storable and as a JSON. This is the highest level of integration that this package provides without a database (Mongo) interface.
203              
204              
205             Arguments:
206             * The options hashref -
207             - force: Force overwrite of already existing file
208             - test: Test the resulted parsed report
209             - doc: limit compilation to these Report types
210             - data_dir: the root directory of the reports
211             - no_compile: don't compile files if required
212             - recompile: force recompilation
213             - no_merge: don't merge files if required
214             - remerge: force remerging
215             * The list of game ids
216              
217             Returns: the location of the normalized storable(s). The JSON would be in the same directory.
218              
219             =back
220              
221             =cut
222              
223             sub new ($$) {
224              
225 8     8 1 65842 my $class = shift;
226 8         62 my $opts = shift;
227              
228 8         77 my $self = {};
229 8 50 100     254 unless ($opts->{no_database} || $ENV{HOCKEYDB_NODB} || ! $MONGO_DB) {
      66        
230 0   0     0 $self->{db} = Sport::Analytics::NHL::DB->new($opts->{database} || $ENV{HOCKEYDB_DBNAME} || $MONGO_DB);
231             }
232 8 100       83 $ENV{HOCKEYDB_DATA_DIR} = $DATA_DIR = $opts->{data_dir} if $opts->{data_dir};
233 8         52 bless $self, $class;
234 8         74 $self;
235             }
236              
237             sub parse_game_args ($$@) {
238              
239 2     2 1 768 my $games = shift;
240 2         12 my $dates = shift;
241 2         9 my @args = @_;
242              
243 2         26 for (@args) {
244 9         31 my $game = {};
245 9         22 when (/^\d{10}$/) { $game = parse_nhl_game_id($_); push(@{$games}, $game) }
  2         5  
  2         3  
  2         4  
246 7         50 when (/^\d{9}$/ ) { $game = parse_our_game_id($_); push(@{$games}, $game) }
  5         27  
  5         13  
  5         19  
247 2         4 when (/^\d{8}$/ ) { push(@{$dates}, $_) }
  2         3  
  2         6  
248 0         0 default { warn "[WARNING] Unrecognized argument $_, skipping\n" }
  0         0  
249             }
250             }
251              
252             sub get_crawled_games_for_dates ($$@) {
253              
254 1     1 1 5 my $self = shift;
255 1         2 my $opts = shift;
256 1         2 my @dates = @_;
257              
258 1         2 my $schedules = {};
259 1         3 my $schedule_by_date = {};
260 1         2 my @games = ();
261 1         3 for my $date (@dates) {
262             $opts->{start_season} = $opts->{stop_season} =
263 4         18 get_season_from_date($date);
264 4 100       19 unless ($schedules->{$opts->{start_season}}) {
265 3         18 $schedules = crawl_schedule($opts);
266             arrange_schedule_by_date(
267             $schedule_by_date,
268             $schedules->{$opts->{start_season}}
269 3         23 );
270 0         0 $self->{db}->insert_schedule(values %{$schedule_by_date})
271 3 50       29 if $self->{db};
272             }
273 4 50       15 unless ($schedule_by_date->{$date}) {
274 0         0 print STDERR "No games scheduled for $date, skipping...\n";
275 0         0 next;
276             }
277 4         7 push(@games, @{$schedule_by_date->{$date}});
  4         18  
278             }
279 1         2905 @games;
280             }
281              
282             sub get_nodb_scheduled_games ($) {
283              
284 1     1 1 1853 my $opts = shift;
285              
286 1         2 my @games = ();
287             my $schedules = $opts->{no_schedule_crawl} ?
288 1 50       19 read_schedules($opts) : crawl_schedule($opts);
289 1         2 for my $season (keys %{$schedules}) {
  1         5  
290 2         14 debug "NODB schedule SEASON $season";
291             my $existing_game_ids =
292 2 50       13 $opts->{force} ? {} : read_existing_game_ids($season);
293             my $season_schedule = ref $schedules->{$season} eq 'ARRAY'
294             ? $schedules->{$season}
295 2 50       12 : [map(@{$_->{games}}, @{$schedules->{$season}{dates}})];
  0         0  
  0         0  
296 2         4 for my $schedule_game ( @{$season_schedule} ) {
  2         5  
297 2706         6782 my $game = convert_schedule_game($schedule_game);
298 2706 100       6264 next unless $game;
299             next unless ($opts->{stage} && $game->{stage} == $opts->{stage})
300             || (!$opts->{stage}
301 2672 50 33     13222 && ($game->{stage} == $REGULAR || $game->{stage} == $PLAYOFF)
      66        
      33        
      33        
302             );
303 2672 50       5936 next if $existing_game_ids->{$game->{game_id}};
304 2672         6289 push(@games, $game);
305             }
306             }
307 1         1365 @games;
308             }
309              
310             sub get_db_scheduled_games ($$) {
311              
312 0     0 1 0 my $self = shift;
313 0         0 my $opts = shift;
314              
315 0         0 my @games = ();
316             my $existing_game_ids = $opts->{force}
317             ? []
318 0 0       0 : $self->{db}->get_existing_game_ids($opts);
319              
320 0 0       0 if ( !$opts->{no_schedule_crawl} ) {
321 0         0 my $schedules = crawl_schedule($opts);
322 0         0 for my $season ( sort keys %{$schedules} ) {
  0         0  
323 0         0 my $schedule_by_date = {};
324             arrange_schedule_by_date(
325             $schedule_by_date,
326 0         0 $schedules->{$season}
327             );
328 0         0 $self->{db}->insert_schedule(values %{$schedule_by_date});
  0         0  
329             }
330             }
331              
332 0         0 debug scalar(@{$existing_game_ids}) . " total existing games";
  0         0  
333             @games = $self->{db}{dbh}->get_collection('schedule')->find(
334             {
335             game_id => { '$nin' => $existing_game_ids },
336             $opts->{stage} ? ( stage => $opts->{stage}+0 ) : (),
337             season => {
338             '$gte' => $opts->{start_season}+0,
339 0 0       0 '$lte' => $opts->{stop_season} +0,
340             },
341             }
342             )->all();
343 0         0 @games;
344             }
345              
346             sub get_scheduled_games ($$) {
347              
348 0     0 1 0 my $self = shift;
349 0         0 my $opts = shift;
350              
351 0   0     0 $opts->{start_season} ||= $CURRENT_SEASON;
352 0   0     0 $opts->{stop_season} ||= $CURRENT_SEASON;
353              
354             $self->{db}
355 0 0       0 ? $self->get_db_scheduled_games($opts)
356             : get_nodb_scheduled_games($opts);
357             }
358              
359             sub scrape_games ($$;@) {
360              
361 1     1 1 20 my $self = shift;
362 1         7 my $opts = shift;
363 1         10 my @args = @_;
364              
365 1         5 my @games = ();
366 1 50       12 if (@args) {
367 1         5 my @dates = ();
368 1         15 parse_game_args(\@games, \@dates, @args);
369             push(
370             @games,
371             $opts->{no_schedule_crawl}
372 1 0       22 ? get_games_for_dates(@dates)
    50          
373             : $self->get_crawled_games_for_dates( $opts, @dates ),
374             ) if @dates;
375             }
376             else {
377 0         0 @games = $self->get_scheduled_games($opts);
378             }
379 1 50       8 unless (@games) {
380 0         0 print STDERR "No games to crawl found!\n";
381 0         0 return ();
382             }
383 1         6 my @got_games;
384 1   33     21 @games = sort { ($a->{ts} || $a->{game_id}) <=> ($b->{ts} || $b->{game_id}) } @games;
  3   33     52  
385 1         7 for my $game (@games) {
386 3 50 33     21 if ($game->{date} && $game->{date} > strftime("%Y%m%d", localtime)) {
387 0         0 print "Game $game->{_id} is in the future ($game->{date}), wrapping up\n";
388 0         0 last;
389             }
390 3         22 my $crawled_game = crawl_game($game);
391 3         11 push(@got_games, map($_->{file}, values %{$crawled_game->{content}}));
  3         179  
392             }
393 1         14 @got_games;
394             }
395              
396             sub compile_file ($$$$) {
397              
398 13     13 1 23 my $opts = shift;
399 13         29 my $file = shift;
400 13         24 my $game_id = shift;
401 13   50     35 my $type = shift || 'XX';
402              
403 13         43 my $args = { file => $file };
404 13 0 33     63 if (
      33        
405             $BROKEN_FILES{$game_id}->{$type} &&
406             $BROKEN_FILES{$game_id}->{$type} != $UNSYNCHED &&
407             $BROKEN_FILES{$game_id}->{$type} != $NO_EVENTS
408             ) {
409 0         0 print STDERR "File $file is broken, skipping\n";
410 0         0 return undef;
411             }
412 13         33 my $storable = $file;
413 13         87 $storable =~ s/\.([a-z]+)$/.storable/i;
414 13 100 66     328 if (!$opts->{force} && -f $storable && -M $storable < -M $file) {
      66        
415 5         140 print STDERR "File $storable already exists, skipping\n";
416 5         27 return $storable;
417             }
418 8         124 my $report = Sport::Analytics::NHL::Report->new($args);
419 8         71 $report->process();
420 8 50       39 if ($opts->{test}) {
421 0         0 test_boxscore($report, { lc $args->{type} => 1 });
422 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
423 0         0 $TEST_COUNTER->{Curr_Test} = 0;
424             }
425 8         61 store $report, $storable;
426 8         23774 debug "Wrote $storable";
427              
428 8         1801 $storable;
429              
430             }
431              
432             sub compile ($$@) {
433              
434 1     1 1 13 my $self = shift;
435 1         6 my $opts = shift;
436 1         9 my @game_ids = @_;
437              
438 1         6 my @storables = ();
439 1         9 for my $game_id (@game_ids) {
440 1         15 $ENV{GS_KEEP_PENL} = 0;
441 1 50       15 if (defined $DEFAULTED_GAMES{$game_id}) {
442 0         0 print STDERR "Skipping defaulted game $game_id\n";
443 0         0 next;
444             }
445 1         24 my @game_files = get_game_files_by_id($game_id, $opts->{data_dir});
446 1 0 33     16 if (
      33        
447             $BROKEN_FILES{$game_id}->{BS} &&
448             $BROKEN_FILES{$game_id}->{BS} == $NO_EVENTS &&
449 0         0 !grep { /PL/ } @game_files
450             ) {
451 0         0 $ENV{GS_KEEP_PENL} = 1;
452             }
453 1         3 for my $game_file (@game_files) {
454 6         32 $game_file =~ m|/([A-Z]{2}).[a-z]{4}$|;
455 6         17 my $type = $1;
456 6 50 33     17 next if ($opts->{doc} && !grep {$_ eq $type} @{$opts->{doc}});
  0         0  
  0         0  
457 6         25 my $storable = compile_file($opts, $game_file, $game_id, $type);
458 6 50       27 push(@storables, $storable) if $storable;
459             }
460             }
461 1         18 return @storables;
462             }
463              
464             sub retrieve_compiled_report ($$$$) {
465              
466 29     29 1 296349 my $opts = shift;
467 29         53 my $game_id = shift;
468 29         73 my $doc = shift;
469 29         60 my $path = shift;
470              
471 29         756 @Sport::Analytics::NHL::Report::RO::ISA = qw(Sport::Analytics::NHL::Report);
472 29         605 @Sport::Analytics::NHL::Report::PL::ISA = qw(Sport::Analytics::NHL::Report);
473 29         567 @Sport::Analytics::NHL::Report::GS::ISA = qw(Sport::Analytics::NHL::Report);
474 29         565 @Sport::Analytics::NHL::Report::ES::ISA = qw(Sport::Analytics::NHL::Report);
475 29         150 my $doc_storable = "$path/$doc.storable";
476 29 100       143 my $doc_source = "$path/$doc." . ($doc eq 'BS' ? 'json' : 'html');
477              
478 29         174 debug "Looking for file $doc_storable or $doc_source";
479 29 100 66     913 return retrieve $doc_storable if -f $doc_storable && ! $opts->{recompile};
480 13 100       59 if ($opts->{no_compile}) {
481 1         31 print STDERR "$doc: No storable file and no-compile option specified, skipping\n";
482 1         823 return undef;
483             }
484 12 100       176 if (! -f $doc_source) {
485 5         141 print STDERR "$doc: No storable and no source report available, skipping\n";
486 5         825 return undef;
487             }
488 7         43 debug "Compiling $doc_source";
489 7         30 $doc_storable = compile_file($opts, $doc_source, $game_id, $doc);
490 7 50       71 retrieve $doc_storable if $doc_storable;
491             }
492              
493             sub merge ($$@) {
494              
495 12     12 1 250523 my $self = shift;
496 12         34 my $opts = shift;
497 12         47 my @game_ids = @_;
498              
499 12         35 my @storables = ();
500              
501 12         55 for my $game_id (@game_ids) {
502 12 50       67 if (defined $DEFAULTED_GAMES{$game_id}) {
503 0         0 print STDERR "Skipping defaulted game $game_id\n";
504 0         0 next;
505             }
506 12         91 my $path = get_game_path_from_id($game_id, $opts->{data_dir});
507 12         70 my $merged = "$path/$MERGED_FILE";
508 12 100 66     256 if (! $opts->{force} && -f $merged) {
509 10         731 print STDERR "Merged file $merged already exists, skipping\n";
510 10         70 push(@storables, $merged);
511 10         43 next;
512             }
513 2   50     20 $opts->{doc} ||= [];
514 2         7 $opts->{doc} = [qw(PL RO GS ES)];
515 2         11 my $boxscore = retrieve_compiled_report($opts, $game_id, 'BS', $path);
516 2         3819 $boxscore->{sources} = {BS => 1};
517 2 50       10 next unless $boxscore;
518 2         13 $boxscore->build_resolve_cache();
519 2         29 $boxscore->set_event_extra_data();
520 2         5 for my $doc (@{$opts->{doc}}) {
  2         9  
521 8         26 my $report = retrieve_compiled_report($opts, $game_id, $doc, $path);
522 8 100       5864 merge_report($boxscore, $report) if $report;
523             }
524 2 50       13 if ($opts->{test}) {
525 0         0 test_merged_boxscore($boxscore);
526 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
527 0         0 $TEST_COUNTER->{Curr_Test} = 0;
528             }
529 2         24 debug "Storing $merged";
530 2         16 store($boxscore, $merged);
531 2         6876 push(@storables, $merged)
532             }
533 12         76 return @storables;
534             }
535              
536             sub check_consistency ($$$;$) {
537              
538 0     0 1 0 my $merged_file = shift;
539 0         0 my $boxscore = shift;
540 0         0 my $event_summary = shift;
541              
542 0         0 my $to_die = 0;
543 0         0 my $loop = 1;
544              
545 0         0 my $frozen_event_summary = $event_summary;
546 0         0 while ($loop) {
547 0         0 $event_summary = dclone $frozen_event_summary;
548 0         0 eval {
549             test_consistency($boxscore, $event_summary)
550             unless $BROKEN_FILES{$boxscore->{_id}}->{BS}
551 0 0 0     0 && keys(%{$boxscore->{sources}}) <= 1;
  0         0  
552             };
553 0 0       0 if ($@) {
554 0         0 my $error = $@;
555 0         0 my $path = dirname($merged_file);
556 0         0 unlink for glob("$path/*.storable");
557 0 0       0 die $error if $to_die == 1;
558 0         0 print STDERR "Trying to fix error: $error";
559 0 0       0 if ($error =~ /team.*(0|1).*playergo.*consistent: (\d+) vs (\d+)/i) {
560 0         0 verbose "Fixing team playergoals";
561 0         0 my $t = $1;
562 0         0 fix_playergoals($boxscore, $t, $event_summary);
563 0         0 store $boxscore, $merged_file;
564 0         0 $to_die = 1;
565 0         0 next;
566             }
567             else {
568 0         0 $error =~ /(\d{7})/; my $player = $1;
  0         0  
569 0 0       0 die $error if $to_die == $player;
570 0 0 0     0 if ($boxscore->{season} < 1945 && $error =~ /assists/) {
    0          
    0          
571 0         0 $error =~ / (\d{7}).* (\d) vs (\d)/;
572 0 0       0 if ($2 == $3 + 1) {
573 0         0 set_player_stat($boxscore, $1, 'assists', $3);
574 0         0 store $boxscore, $merged_file;
575             }
576             }
577             elsif ($error =~ /goalsAgainst/) {
578 0         0 $error =~ / (\d{7}).* (\d+) vs (\d+)/;
579 0         0 set_player_stat($boxscore, $1, 'goalsAgainst', $3);
580 0         0 store $boxscore, $merged_file;
581             }
582             elsif ($error =~ /penaltyMinutes/ ) {
583 0         0 $error =~ / (\d{7}).* (\d+) vs (\d+)/;
584             my $result = set_player_stat(
585             $boxscore, $1, 'penaltyMinutes', $3,
586             $event_summary->{$1}{_servedbyMinutes},
587 0   0     0 ) || 0;
588 0 0       0 store $boxscore, $merged_file unless $result;
589             }
590 0         0 $to_die = $player;
591             }
592             }
593             else {
594 0         0 $loop = 0;
595             }
596             }
597             }
598              
599             sub normalize ($$@) {
600              
601 2     2 1 1053 my $self = shift;
602 2         8 my $opts = shift;
603 2         15 my @game_ids = @_;
604              
605 2         9 my @storables = ();
606              
607 2         14 for my $game_id (@game_ids) {
608 2 50       21 if (defined $DEFAULTED_GAMES{$game_id}) {
609 0         0 print STDERR "Skipping defaulted game $game_id\n";
610 0         0 next;
611             }
612 2         6 my $repeat = -1;
613 2         8 REPEAT:
614             $repeat++;
615 2         21 my $path = get_game_path_from_id($game_id);
616 2         10 my $normalized = "$path/$NORMALIZED_FILE";
617 2 50 33     67 if (! $opts->{force} && -f $normalized) {
618 0         0 print STDERR "Normalized file $normalized already exists, skipping\n";
619 0         0 push(@storables, $normalized);
620 0         0 next;
621             }
622 2         16 my @merged = $self->merge($opts, $game_id);
623 2         22 my $boxscore = retrieve $merged[0];
624 2   50     6224 $boxscore->{sources}{BS} ||= 1;
625 2 50       7 if (! $boxscore) {
626 0         0 print STDERR "Couldn't retrieve the merged file, skipping";
627 0         0 next;
628             }
629 2         21 my $event_summary = summarize($boxscore);
630 2 50       8 if ($opts->{test}) {
631 0         0 check_consistency($merged[0], $boxscore, $event_summary);
632 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
633 0         0 $TEST_COUNTER->{Curr_Test} = 0;
634             }
635 2         4 eval {
636 2         9 normalize_boxscore($boxscore, 1);
637             };
638 2 50       7 if ($@) {
639 0         0 unlink for glob("$path/*.storable");
640 0         0 die $@;
641             }
642 2 50       6 if ($opts->{test}) {
643 0         0 eval {
644 0         0 test_normalized_boxscore($boxscore);
645             };
646 0 0       0 if ($@) {
647 0         0 unlink $merged[0];
648 0 0       0 goto REPEAT if ! $repeat;
649             }
650 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
651 0         0 $TEST_COUNTER->{Curr_Test} = 0;
652             }
653 2         13 debug "Storing $normalized";
654 2         119 my $json = JSON->new()->pretty(1)->allow_nonref->convert_blessed;
655 2         115 write_file($json->encode($boxscore), "$path/$NORMALIZED_JSON");
656 2         49 store($boxscore, $normalized);
657 2         5200 push(@storables, $normalized);
658             }
659 2         20 return @storables;
660             }
661              
662             =head1 AUTHOR
663              
664             More Hockey Stats, C<< >>
665              
666             =head1 BUGS
667              
668             Please report any bugs or feature requests to C, or through
669             the web interface at L. I will be notified, and then you'll
670             automatically be notified of progress on your bug as I make changes.
671              
672              
673              
674              
675             =head1 SUPPORT
676              
677             You can find documentation for this module with the perldoc command.
678              
679             perldoc Sport::Analytics::NHL
680              
681             You can also look for information at:
682              
683             =over 4
684              
685             =item * RT: CPAN's request tracker (report bugs here)
686              
687             L
688              
689             =item * AnnoCPAN: Annotated CPAN documentation
690              
691             L
692              
693             =item * CPAN Ratings
694              
695             L
696              
697             =item * Search CPAN
698              
699             L
700              
701             =back
702              
703              
704             =head1 ACKNOWLEDGEMENTS
705              
706              
707             =head1 LICENSE AND COPYRIGHT
708              
709             Copyright 2018 More Hockey Stats.
710              
711             This program is released under the following license: gnu
712              
713              
714             =cut
715              
716             1; # End of Sport::Analytics::NHL