File Coverage

blib/lib/Sport/Analytics/NHL.pm
Criterion Covered Total %
statement 246 392 62.7
branch 45 126 35.7
condition 32 101 31.6
subroutine 33 37 89.1
pod 15 15 100.0
total 371 671 55.2


line stmt bran cond sub pod time code
1             package Sport::Analytics::NHL;
2              
3 25     25   699961 use v5.10.1;
  25         189  
4 25     25   143 use strict;
  25         53  
  25         611  
5 25     25   142 use warnings FATAL => 'all';
  25         47  
  25         978  
6 25     25   714 use experimental qw(smartmatch);
  25         4052  
  25         165  
7              
8 25     25   1458 use File::Basename;
  25         131  
  25         1839  
9 25     25   7285 use Storable qw(store retrieve dclone);
  25         39820  
  25         1585  
10 25     25   5893 use POSIX qw(strftime);
  25         74411  
  25         191  
11              
12 25     25   25009 use List::MoreUtils qw(uniq);
  25         142875  
  25         198  
13 25     25   26878 use JSON -convert_blessed_universally;
  25         132373  
  25         444  
14              
15 25     25   12845 use Sport::Analytics::NHL::LocalConfig;
  25         61  
  25         2946  
16 25     25   147 use Sport::Analytics::NHL::Config;
  25         46  
  25         3940  
17 25     25   6539 use Sport::Analytics::NHL::Errors;
  25         157  
  25         5159  
18              
19 25   33 25   8135 use if ! $ENV{HOCKEYDB_NODB} && $MONGO_DB, 'Sport::Analytics::NHL::DB';
  25         182  
  25         281  
20 25     25   10914 use Sport::Analytics::NHL::Merger;
  25         89  
  25         1417  
21 25     25   11589 use Sport::Analytics::NHL::Normalizer;
  25         87  
  25         1604  
22 25     25   10968 use Sport::Analytics::NHL::Populator;
  25         62  
  25         1218  
23 25     25   151 use Sport::Analytics::NHL::Report;
  25         49  
  25         503  
24 25     25   116 use Sport::Analytics::NHL::Scraper;
  25         54  
  25         1002  
25 25     25   136 use Sport::Analytics::NHL::Test;
  25         48  
  25         3876  
26 25     25   143 use Sport::Analytics::NHL::Tools;
  25         47  
  25         3653  
27 25     25   161 use Sport::Analytics::NHL::Util;
  25         48  
  25         1440  
28              
29 25     25   144 use parent 'Exporter';
  25         62  
  25         94  
30              
31             =head1 NAME
32              
33             Sport::Analytics::NHL - Crawl data from NHL.com and put it into a database
34              
35             =head1 VERSION
36              
37             Version 1.40
38              
39             =cut
40              
41             our @EXPORT = qw(
42             hdb_version
43             );
44              
45             our $VERSION = "1.40";
46              
47             =head1 SYNOPSIS
48              
49             Crawl data from NHL.com and put it into a database.
50              
51             Crawls the NHL.com website, processes the game reports and stores them into a Mongo database or into the filesystem.
52              
53             use Sport::Analytics::NHL;
54              
55             my $nhl = Sport::Analytics::NHL->new();
56             $nhl->scrape_games();
57             ...
58             # more functionality to be added in later releases.
59              
60             =head1 EXPORT
61              
62             hdb_version() - report the version. All the other interface is OOP via the new() constructor.
63              
64             =cut
65              
66             sub hdb_version () {
67              
68 1     1 1 1204 $VERSION;
69             }
70              
71             =head1 METHODS
72              
73             =over 2
74              
75             =item C
76              
77             Returns the current version of the package
78              
79             =item C
80              
81             Returns a new Sport::Analytics::NHL object. If a Mongo DB is configured, the connection to the database is established, and the handle is stored in the object.
82              
83             =item C
84              
85             Parses various game arguments to the scrape_games() method:
86             * NHL IDs of format SSSS0TIIII (2016020201)
87             * Our IDs of format SSSSTIIII (201620201)
88             * Dates in format YYYYMMDD (20160202)
89              
90             where S stands for starting year of season, T - stage (2 - regular, 3 - playoffs), I - the ID of the game within the year.
91              
92             Modifies the games array reference passed as the first argument, and dates array reference passed as the second argument, using the list of number strings as the remaining list of arguments.
93              
94             =item C
95              
96             Gets a list of already crawled games on given list of dates. Crawls the season schedule on the NHL website if necessary.
97             Arguments: the options to pass to the scraper that crawls and the list of the dates.
98             Returns: the list of game structures which are hash references with the following fields:
99             * season
100             * stage
101             * season id
102             * Our game ID (see the previous section)
103              
104             =item C
105              
106             Gets a list of scheduled, uncrawled games in the filesystem, based on the schedules already stored in, or crawled into the system.
107             Argument: options hashref that specifies whether new schedules should be crawled, and only specific stage should be filtered.
108             Returns: the list of game structures which are hash references with the following fields:
109             * season
110             * stage
111             * season id
112             * Our game ID (see the previous section)
113              
114             =item C
115              
116             Same as the previous method, but the information is extracted from the Mongo database rather than the filesystem.
117              
118             =item C
119              
120             The generic wrapper for the two previous methods.
121              
122             =item C
123              
124             Scrape the games reports from the NHL website and store them in files on the disk.
125             Arguments: the hashref of options for the scrape -
126             * no_schedule_crawl - whether fresh schedule should be crawled
127             * start_season - the first season to start scraping from (default 1917)
128             * stop_season - the last season to scrape (default - ongoing)
129             * stage - 2 for Regular, 3 for Playoffs, none for both (default - none)
130             * force - override the already present files and data
131              
132             =item C
133              
134             Compiles a single JSON or HTML report into a parsed hashref and stores it in a Storable file
135             Arguments:
136             * The options hashref -
137             - force: Force overwrite of already existing file
138             - test: Test the resulted parsed report
139             * The file
140             * Our SSSSTNNNN game id
141             * Optional: preset type of the report
142              
143             Returns: the path to the compiled file
144              
145             =item C
146              
147             Compiles reports retrieved into the filesystem into parsed hashrefs and stores them in a Storable file.
148             Arguments:
149             * The options hashref -
150             - force: Force overwrite of already existing file
151             - test: Test the resulted parsed report
152             - doc: limit compilation to these Report types
153             - data_dir: the root directory of the reports
154             * The list of game ids
155              
156             Returns: the location of the compiled storables
157              
158             =item C
159              
160             Retrieves the compiled storable file for the given game ID and file type.
161             Compiles the file anew unless explicitly prohibited from doing so.
162              
163             Arguments:
164             * The options hashref -
165             - no_compile: don't compile files if required
166             - recompile: force recompilation
167             * game ID
168             * doc type (e.g. BS, PL, RO, ...)
169             * path to the storable file.
170             The file is expected at location $path/$doc.storable
171              
172             Returns: the file structure retrieved from storable, or undef.
173              
174             =item C
175              
176             Merges reports compiled in the filesystem into one boxscore hashref and stores it in a Storable file.
177              
178             Arguments:
179             * The options hashref -
180             - force: Force overwrite of already existing file
181             - test: Test the resulted parsed report
182             - doc: limit compilation to these Report types
183             - data_dir: the root directory of the reports
184             - no_compile: don't compile files if required
185             - recompile: force recompilation
186             * The list of game ids
187              
188             Returns: the location of the merged storable
189              
190             =item C
191              
192             Checks the consistency between the summarized events and the summary data in the boxscore itself. If there are inconsistencies, the game files are recompiled and remerged and some fix If there are unfixable inconsistencies, the check dies.
193              
194             Arguments:
195             * The merged file (to manage the game files)
196             * The boxscore to summarize
197             * The produced summary of events
198              
199             Returns: void. Dies if something goes wrong.
200              
201             =item C
202              
203             Normalizes the merged boxscore, providing default values and erasing unnecessary data from the boxscore data structure. Saves the normalized boxscore both as a Perl storable and as a JSON. This is the highest level of integration that this package provides without a database (Mongo) interface.
204              
205              
206             Arguments:
207             * The options hashref -
208             - force: Force overwrite of already existing file
209             - test: Test the resulted parsed report
210             - doc: limit compilation to these Report types
211             - data_dir: the root directory of the reports
212             - no_compile: don't compile files if required
213             - recompile: force recompilation
214             - no_merge: don't merge files if required
215             - remerge: force remerging
216             * The list of game ids
217              
218             Returns: the location of the normalized storable(s). The JSON would be in the same directory.
219              
220             =item C
221              
222             Populates the Mongo DB from the normalized boxscores. Normalizes the boxscore if necessary and if requested.
223              
224              
225             Arguments:
226             * The options hashref -
227             - same options as normalize() (q.v.) plus:
228             - no_normalize: don't normalize files if required
229             - renormalize: force normalizing.
230             * The list of the normalized game ids
231              
232             Returns: the list of inserted game's ids.
233              
234             =back
235              
236             =cut
237              
238             sub new ($$) {
239              
240 8     8 1 118813 my $class = shift;
241 8         64 my $opts = shift;
242              
243 8         72 my $self = {};
244 8 50 100     239 unless ($opts->{no_database} || $ENV{HOCKEYDB_NODB} || ! $MONGO_DB) {
      66        
245 0   0     0 $self->{db} = Sport::Analytics::NHL::DB->new($opts->{database} || $ENV{HOCKEYDB_DBNAME} || $MONGO_DB);
246             }
247 8 100       95 $ENV{HOCKEYDB_DATA_DIR} = $DATA_DIR = $opts->{data_dir} if $opts->{data_dir};
248 8         56 bless $self, $class;
249 8         68 $self;
250             }
251              
252             sub parse_game_args ($$@) {
253              
254 2     2 1 1207 my $games = shift;
255 2         5 my $dates = shift;
256 2         10 my @args = @_;
257              
258 2         11 for (@args) {
259 9         14 my $game = {};
260 9         26 when (/^\d{10}$/) { $game = parse_nhl_game_id($_); push(@{$games}, $game) }
  2         5  
  2         3  
  2         4  
261 7         28 when (/^\d{9}$/ ) { $game = parse_our_game_id($_); push(@{$games}, $game) }
  5         35  
  5         16  
  5         17  
262 2         4 when (/^\d{8}$/ ) { push(@{$dates}, $_) }
  2         4  
  2         6  
263 0         0 default { warn "[WARNING] Unrecognized argument $_, skipping\n" }
  0         0  
264             }
265             }
266              
267             sub get_crawled_games_for_dates ($$@) {
268              
269 1     1 1 5 my $self = shift;
270 1         2 my $opts = shift;
271 1         2 my @dates = @_;
272              
273 1         2 my $schedules = {};
274 1         2 my $schedule_by_date = {};
275 1         2 my @games = ();
276 1         3 for my $date (@dates) {
277             $opts->{start_season} = $opts->{stop_season} =
278 4         22 get_season_from_date($date);
279 4 100       20 unless ($schedules->{$opts->{start_season}}) {
280 3         17 $schedules = crawl_schedule($opts);
281             arrange_schedule_by_date(
282             $schedule_by_date,
283             $schedules->{$opts->{start_season}}
284 3         30 );
285 0         0 $self->{db}->insert_schedule(values %{$schedule_by_date})
286 3 50       29 if $self->{db};
287             }
288 4 50       19 unless ($schedule_by_date->{$date}) {
289 0         0 print STDERR "No games scheduled for $date, skipping...\n";
290 0         0 next;
291             }
292 4         9 push(@games, @{$schedule_by_date->{$date}});
  4         19  
293             }
294 1         3119 @games;
295             }
296              
297             sub get_nodb_scheduled_games ($) {
298              
299 1     1 1 1641 my $opts = shift;
300              
301 1         3 my @games = ();
302             my $schedules = $opts->{no_schedule_crawl} ?
303 1 50       7 read_schedules($opts) : crawl_schedule($opts);
304 1         3 for my $season (keys %{$schedules}) {
  1         3  
305 2         16 debug "NODB schedule SEASON $season";
306             my $existing_game_ids =
307 2 50       14 $opts->{force} ? {} : read_existing_game_ids($season);
308             my $season_schedule = ref $schedules->{$season} eq 'ARRAY'
309             ? $schedules->{$season}
310 2 50       11 : [map(@{$_->{games}}, @{$schedules->{$season}{dates}})];
  0         0  
  0         0  
311 2         4 for my $schedule_game ( @{$season_schedule} ) {
  2         5  
312 2706         5640 my $game = convert_schedule_game($schedule_game);
313 2706 100       5692 next unless $game;
314             next unless ($opts->{stage} && $game->{stage} == $opts->{stage})
315             || (!$opts->{stage}
316 2672 50 33     13169 && ($game->{stage} == $REGULAR || $game->{stage} == $PLAYOFF)
      66        
      33        
      33        
317             );
318 2672 50       6064 next if $existing_game_ids->{$game->{game_id}};
319 2672         5988 push(@games, $game);
320             }
321             }
322 1         1287 @games;
323             }
324              
325             sub get_db_scheduled_games ($$) {
326              
327 0     0 1 0 my $self = shift;
328 0         0 my $opts = shift;
329              
330 0         0 my @games = ();
331             my $existing_game_ids = $opts->{force}
332             ? []
333 0 0       0 : $self->{db}->get_existing_game_ids($opts);
334              
335 0 0       0 if ( !$opts->{no_schedule_crawl} ) {
336 0         0 my $schedules = crawl_schedule($opts);
337 0         0 for my $season ( sort keys %{$schedules} ) {
  0         0  
338 0         0 my $schedule_by_date = {};
339             arrange_schedule_by_date(
340             $schedule_by_date,
341 0         0 $schedules->{$season}
342             );
343 0         0 $self->{db}->insert_schedule(values %{$schedule_by_date});
  0         0  
344             }
345             }
346              
347 0         0 debug scalar(@{$existing_game_ids}) . " total existing games";
  0         0  
348             @games = $self->{db}{dbh}->get_collection('schedule')->find(
349             {
350             game_id => { '$nin' => $existing_game_ids },
351             $opts->{stage} ? ( stage => $opts->{stage}+0 ) : (),
352             season => {
353             '$gte' => $opts->{start_season}+0,
354 0 0       0 '$lte' => $opts->{stop_season} +0,
355             },
356             }
357             )->all();
358 0         0 @games;
359             }
360              
361             sub get_scheduled_games ($$) {
362              
363 0     0 1 0 my $self = shift;
364 0         0 my $opts = shift;
365              
366 0   0     0 $opts->{start_season} ||= $CURRENT_SEASON;
367 0   0     0 $opts->{stop_season} ||= $CURRENT_SEASON;
368              
369             $self->{db}
370 0 0       0 ? $self->get_db_scheduled_games($opts)
371             : get_nodb_scheduled_games($opts);
372             }
373              
374             sub scrape_games ($$;@) {
375              
376 1     1 1 19 my $self = shift;
377 1         7 my $opts = shift;
378 1         15 my @args = @_;
379              
380 1         13 my @games = ();
381 1 50       31 if (@args) {
382 1         10 my @dates = ();
383 1         14 parse_game_args(\@games, \@dates, @args);
384             push(
385             @games,
386             $opts->{no_schedule_crawl}
387 1 0       24 ? get_games_for_dates(@dates)
    50          
388             : $self->get_crawled_games_for_dates( $opts, @dates ),
389             ) if @dates;
390             }
391             else {
392 0         0 @games = $self->get_scheduled_games($opts);
393             }
394 1 50       11 unless (@games) {
395 0         0 print STDERR "No games to crawl found!\n";
396 0         0 return ();
397             }
398 1         3 my @got_games;
399 1   33     36 @games = sort { ($a->{ts} || $a->{game_id}) <=> ($b->{ts} || $b->{game_id}) } @games;
  3   33     44  
400 1         10 for my $game (@games) {
401 3 50 33     34 if ($game->{date} && $game->{date} > strftime("%Y%m%d", localtime)) {
402 0         0 print "Game $game->{_id} is in the future ($game->{date}), wrapping up\n";
403 0         0 last;
404             }
405 3         27 my $crawled_game = crawl_game($game);
406 3         12 push(@got_games, map($_->{file}, values %{$crawled_game->{content}}));
  3         237  
407             }
408 1         16 @got_games;
409             }
410              
411             sub compile_file ($$$$) {
412              
413 19     19 1 50 my $opts = shift;
414 19         47 my $file = shift;
415 19         34 my $game_id = shift;
416 19   50     83 my $type = shift || 'XX';
417              
418 19         69 my $args = { file => $file };
419 19 0 33     137 if (
      33        
420             $BROKEN_FILES{$game_id}->{$type} &&
421             $BROKEN_FILES{$game_id}->{$type} != $UNSYNCHED &&
422             $BROKEN_FILES{$game_id}->{$type} != $NO_EVENTS
423             ) {
424 0         0 print STDERR "File $file is broken, skipping\n";
425 0         0 return undef;
426             }
427 19         46 my $storable = $file;
428 19         139 $storable =~ s/\.([a-z]+)$/.storable/i;
429 19 50 33     451 if (!$opts->{force} && ! $opts->{recompile} && -f $storable && -M $storable < -M $file) {
      33        
      33        
430 0         0 print STDERR "File $storable already exists, skipping\n";
431 0         0 return $storable;
432             }
433 19         165 my $report = Sport::Analytics::NHL::Report->new($args);
434 19         170 $report->process();
435 19 50       113 if ($opts->{test}) {
436 0         0 test_boxscore($report, { lc $args->{type} => 1 });
437 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
438 0         0 $TEST_COUNTER->{Curr_Test} = 0;
439             }
440 19         164 store $report, $storable;
441 19         79218 debug "Wrote $storable";
442              
443 19         5530 $storable;
444              
445             }
446              
447             sub compile ($$@) {
448              
449 1     1 1 18 my $self = shift;
450 1         12 my $opts = shift;
451 1         8 my @game_ids = @_;
452              
453 1         8 my @storables = ();
454 1         9 for my $game_id (@game_ids) {
455 1         21 $ENV{GS_KEEP_PENL} = 0;
456 1 50       15 if (defined $DEFAULTED_GAMES{$game_id}) {
457 0         0 print STDERR "Skipping defaulted game $game_id\n";
458 0         0 next;
459             }
460 1         23 my @game_files = get_game_files_by_id($game_id, $opts->{data_dir});
461 1 0 33     15 if (
      33        
462             $BROKEN_FILES{$game_id}->{BS} &&
463             $BROKEN_FILES{$game_id}->{BS} == $NO_EVENTS &&
464 0         0 !grep { /PL/ } @game_files
465             ) {
466 0         0 $ENV{GS_KEEP_PENL} = 1;
467             }
468 1         7 for my $game_file (@game_files) {
469 6         55 $game_file =~ m|/([A-Z]{2}).[a-z]{4}$|;
470 6         22 my $type = $1;
471 6 50 33     36 next if ($opts->{doc} && !grep {$_ eq $type} @{$opts->{doc}});
  0         0  
  0         0  
472 6         25 my $storable = compile_file($opts, $game_file, $game_id, $type);
473 6 50       46 push(@storables, $storable) if $storable;
474             }
475             }
476 1         19 return @storables;
477             }
478              
479             sub retrieve_compiled_report ($$$$) {
480              
481 39     39 1 384295 my $opts = shift;
482 39         85 my $game_id = shift;
483 39         89 my $doc = shift;
484 39         96 my $path = shift;
485              
486 39         1058 @Sport::Analytics::NHL::Report::RO::ISA = qw(Sport::Analytics::NHL::Report);
487 39         800 @Sport::Analytics::NHL::Report::PL::ISA = qw(Sport::Analytics::NHL::Report);
488 39         767 @Sport::Analytics::NHL::Report::GS::ISA = qw(Sport::Analytics::NHL::Report);
489 39         736 @Sport::Analytics::NHL::Report::ES::ISA = qw(Sport::Analytics::NHL::Report);
490 39         194 my $doc_storable = "$path/$doc.storable";
491 39 100       209 my $doc_source = "$path/$doc." . ($doc eq 'BS' ? 'json' : 'html');
492              
493 39         255 debug "Looking for file $doc_storable or $doc_source";
494 39 100 66     1083 return retrieve $doc_storable if -f $doc_storable && ! $opts->{recompile};
495 23 100       109 if ($opts->{no_compile}) {
496 1         353 print STDERR "$doc: No storable file and no-compile option specified, skipping\n";
497 1         1306 return undef;
498             }
499 22 100       344 if (! -f $doc_source) {
500 9         503 print STDERR "$doc: No storable and no source report available, skipping\n";
501 9         1109 return undef;
502             }
503 13         139 debug "Compiling $doc_source";
504 13         350 $doc_storable = compile_file($opts, $doc_source, $game_id, $doc);
505 13 50       152 retrieve $doc_storable if $doc_storable;
506             }
507              
508             sub merge ($$@) {
509              
510 12     12 1 264548 my $self = shift;
511 12         41 my $opts = shift;
512 12         43 my @game_ids = @_;
513              
514 12         43 my @storables = ();
515              
516 12         47 for my $game_id (@game_ids) {
517 12 50       74 if (defined $DEFAULTED_GAMES{$game_id}) {
518 0         0 print STDERR "Skipping defaulted game $game_id\n";
519 0         0 next;
520             }
521 12         101 my $path = get_game_path_from_id($game_id, $opts->{data_dir});
522 12         70 my $merged = "$path/$MERGED_FILE";
523 12 100 33     289 if (! $opts->{force} && ! $opts->{remerge} && -f $merged) {
      66        
524 8         343 print STDERR "Merged file $merged already exists, skipping\n";
525 8         37 push(@storables, $merged);
526 8         22 next;
527             }
528 4   50     36 $opts->{doc} ||= [];
529 4         17 $opts->{doc} = [qw(PL RO GS ES)];
530 4         21 my $boxscore = retrieve_compiled_report($opts, $game_id, 'BS', $path);
531 4         5973 $boxscore->{sources} = {BS => 1};
532 4 50       17 next unless $boxscore;
533 4         24 $boxscore->build_resolve_cache();
534 4         44 $boxscore->set_event_extra_data();
535 4         6 for my $doc (@{$opts->{doc}}) {
  4         17  
536 16         58 my $report = retrieve_compiled_report($opts, $game_id, $doc, $path);
537 16 100       10360 merge_report($boxscore, $report) if $report;
538             }
539 4 50       197 if ($opts->{test}) {
540 0         0 test_merged_boxscore($boxscore);
541 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
542 0         0 $TEST_COUNTER->{Curr_Test} = 0;
543             }
544 4         39 debug "Storing $merged";
545 4         33 store($boxscore, $merged);
546 4         16970 push(@storables, $merged)
547             }
548 12         71 return @storables;
549             }
550              
551             sub check_consistency ($$$;$) {
552              
553 0     0 1 0 my $merged_file = shift;
554 0         0 my $boxscore = shift;
555 0         0 my $event_summary = shift;
556              
557 0         0 my $to_die = 0;
558 0         0 my $loop = 1;
559              
560 0         0 my $frozen_event_summary = $event_summary;
561 0         0 while ($loop) {
562 0         0 $event_summary = dclone $frozen_event_summary;
563 0         0 eval {
564             test_consistency($boxscore, $event_summary)
565             unless $BROKEN_FILES{$boxscore->{_id}}->{BS}
566 0 0 0     0 && keys(%{$boxscore->{sources}}) <= 1;
  0         0  
567             };
568 0 0       0 if ($@) {
569 0         0 my $error = $@;
570 0         0 my $path = dirname($merged_file);
571 0         0 unlink for glob("$path/*.storable");
572 0 0       0 die $error if $to_die == 1;
573 0         0 print STDERR "Trying to fix error: $error";
574 0 0       0 if ($error =~ /team.*(0|1).*playergo.*consistent: (\d+) vs (\d+)/i) {
575 0         0 verbose "Fixing team playergoals";
576 0         0 my $t = $1;
577 0         0 fix_playergoals($boxscore, $t, $event_summary);
578 0         0 store $boxscore, $merged_file;
579 0         0 $to_die = 1;
580 0         0 next;
581             }
582             else {
583 0         0 $error =~ /(\d{7})/; my $player = $1;
  0         0  
584 0 0       0 die $error if $to_die == $player;
585 0 0 0     0 if ($boxscore->{season} < 1945 && $error =~ /assists/) {
    0          
    0          
586 0         0 $error =~ / (\d{7}).* (\d) vs (\d)/;
587 0 0       0 if ($2 == $3 + 1) {
588 0         0 set_player_stat($boxscore, $1, 'assists', $3);
589 0         0 store $boxscore, $merged_file;
590             }
591             }
592             elsif ($error =~ /goalsAgainst/) {
593 0         0 $error =~ / (\d{7}).* (\d+) vs (\d+)/;
594 0         0 set_player_stat($boxscore, $1, 'goalsAgainst', $3);
595 0         0 store $boxscore, $merged_file;
596             }
597             elsif ($error =~ /penaltyMinutes/ ) {
598 0         0 $error =~ / (\d{7}).* (\d+) vs (\d+)/;
599             my $result = set_player_stat(
600             $boxscore, $1, 'penaltyMinutes', $3,
601             $event_summary->{$1}{_servedbyMinutes},
602 0   0     0 ) || 0;
603 0 0       0 store $boxscore, $merged_file unless $result;
604             }
605 0         0 $to_die = $player;
606             }
607             }
608             else {
609 0         0 $loop = 0;
610             }
611             }
612             }
613              
614             sub normalize ($$@) {
615              
616 2     2 1 1437 my $self = shift;
617 2         16 my $opts = shift;
618 2         9 my @game_ids = @_;
619              
620 2         9 my @storables = ();
621              
622 2         7 for my $game_id (@game_ids) {
623 2 50       16 if (defined $DEFAULTED_GAMES{$game_id}) {
624 0         0 print STDERR "Skipping defaulted game $game_id\n";
625 0         0 next;
626             }
627 2         4 my $repeat = -1;
628 2         7 REPEAT:
629             $repeat++;
630 2         25 my $path = get_game_path_from_id($game_id);
631 2         13 my $normalized = "$path/$NORMALIZED_FILE";
632 2 50 33     75 if (! $opts->{force} && ! $opts->{renormalize} && -f $normalized) {
      33        
633 0         0 print STDERR "Normalized file $normalized already exists, skipping\n";
634 0         0 push(@storables, $normalized);
635 0         0 next;
636             }
637 2         15 my @merged = $self->merge($opts, $game_id);
638 2         10 my $boxscore = retrieve $merged[0];
639 2   50     6203 $boxscore->{sources}{BS} ||= 1;
640 2 50       8 if (! $boxscore) {
641 0         0 print STDERR "Couldn't retrieve the merged file, skipping";
642 0         0 next;
643             }
644 2         22 my $event_summary = summarize($boxscore);
645 2 50       11 if ($opts->{test}) {
646 0         0 check_consistency($merged[0], $boxscore, $event_summary);
647 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
648 0         0 $TEST_COUNTER->{Curr_Test} = 0;
649             }
650 2         6 eval {
651 2         24 normalize_boxscore($boxscore, 1);
652             };
653 2 50       9 if ($@) {
654 0         0 unlink for glob("$path/*.storable");
655 0         0 die $@;
656             }
657 2 50       8 if ($opts->{test}) {
658 0         0 eval {
659 0         0 test_normalized_boxscore($boxscore);
660             };
661 0 0       0 if ($@) {
662 0         0 unlink $merged[0];
663 0 0       0 goto REPEAT if ! $repeat;
664 0         0 die $@;
665             }
666 0         0 verbose "Ran $TEST_COUNTER->{Curr_Test} tests";
667 0         0 $TEST_COUNTER->{Curr_Test} = 0;
668             }
669 2         20 debug "Storing $normalized";
670 2         82 my $json = JSON->new()->pretty(1)->allow_nonref->convert_blessed;
671 2         91 write_file($json->encode($boxscore), "$path/$NORMALIZED_JSON");
672             # unless $0 =~ /\.t/ && $path !~ /tmp/;
673 2         14 store $boxscore, $normalized;
674 2         6230 push(@storables, $normalized);
675             }
676 2         39 return @storables;
677             }
678              
679             sub populate ($$@) {
680              
681 0     0 1   my $self = shift;
682 0           my $opts = shift;
683 0           my @game_ids = @_;
684              
685 0           my @db_game_ids = ();
686              
687 0 0         if (! $self->{db}) {
688 0           print "You need Mongo DB to populate.\n";
689 0           return ();
690             }
691 0           for my $game_id (@game_ids) {
692 0 0         if (defined $DEFAULTED_GAMES{$game_id}) {
693 0           print STDERR "Skipping defaulted game $game_id\n";
694 0           next;
695             }
696 0           my $db_game = $self->{db}->get_collection('games')->find_one({_id => $game_id+0});
697 0 0 0       if ($db_game && ! $opts->{force}) {
698 0           print STDERR "Game $game_id already present in the database\n";
699 0           push(@db_game_ids, $game_id);
700 0           next;
701             }
702 0           my $path = get_game_path_from_id($game_id);
703 0           my $normalized = "$path/$NORMALIZED_FILE";
704 0 0 0       if ($opts->{no_normalize} && ! -f $normalized) {
705 0           print STDERR "No normalized file and no normalize option specified, skipping\n";
706 0           next;
707             }
708 0 0 0       $self->normalize($opts, $game_id) if ! -f $normalized || $opts->{renormalize};
709 0 0         if (! $normalized) {
710 0           print STDERR "Error normalizing file $normalized, skipping\n";
711 0           next;
712             }
713 0           my $boxscore = retrieve $normalized;
714 0 0         if (! $boxscore) {
715 0           print STDERR "Couldn't retrieve the normalized file, skipping\n";
716 0           next;
717             }
718 0           $opts->{no_norm} = 1;
719 0 0         $opts->{repopulate} = $db_game ? 1 : 0;
720 0           my $db_game_id = populate_db($boxscore, $opts);
721 0           push(@db_game_ids, $db_game_id);
722             }
723 0           @db_game_ids;
724             }
725              
726             =head1 AUTHOR
727              
728             More Hockey Stats, C<< >>
729              
730             =head1 BUGS
731              
732             Please report any bugs or feature requests to C, or through
733             the web interface at L. I will be notified, and then you'll
734             automatically be notified of progress on your bug as I make changes.
735              
736              
737              
738              
739             =head1 SUPPORT
740              
741             You can find documentation for this module with the perldoc command.
742              
743             perldoc Sport::Analytics::NHL
744              
745             You can also look for information at:
746              
747             =over 4
748              
749             =item * RT: CPAN's request tracker (report bugs here)
750              
751             L
752              
753             =item * AnnoCPAN: Annotated CPAN documentation
754              
755             L
756              
757             =item * CPAN Ratings
758              
759             L
760              
761             =item * Search CPAN
762              
763             L
764              
765             =back
766              
767              
768             =head1 ACKNOWLEDGEMENTS
769              
770              
771             =head1 LICENSE AND COPYRIGHT
772              
773             Copyright 2018 More Hockey Stats.
774              
775             This program is released under the following license: gnu
776              
777              
778             =cut
779              
780             1; # End of Sport::Analytics::NHL