line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Sport::Analytics::NHL::Scraper; |
2
|
|
|
|
|
|
|
|
3
|
33
|
|
|
33
|
|
120659
|
use v5.10.1; |
|
33
|
|
|
|
|
124
|
|
4
|
33
|
|
|
33
|
|
169
|
use warnings FATAL => 'all'; |
|
33
|
|
|
|
|
63
|
|
|
33
|
|
|
|
|
1343
|
|
5
|
33
|
|
|
33
|
|
176
|
use strict; |
|
33
|
|
|
|
|
66
|
|
|
33
|
|
|
|
|
783
|
|
6
|
33
|
|
|
33
|
|
157
|
use experimental qw(smartmatch); |
|
33
|
|
|
|
|
55
|
|
|
33
|
|
|
|
|
233
|
|
7
|
33
|
|
|
33
|
|
2704
|
use parent 'Exporter'; |
|
33
|
|
|
|
|
508
|
|
|
33
|
|
|
|
|
202
|
|
8
|
|
|
|
|
|
|
|
9
|
33
|
|
|
33
|
|
16193
|
use Time::HiRes qw(time usleep); |
|
33
|
|
|
|
|
38753
|
|
|
33
|
|
|
|
|
132
|
|
10
|
33
|
|
|
33
|
|
7158
|
use POSIX qw(strftime); |
|
33
|
|
|
|
|
15783
|
|
|
33
|
|
|
|
|
324
|
|
11
|
|
|
|
|
|
|
|
12
|
33
|
|
|
33
|
|
7392
|
use JSON; |
|
33
|
|
|
|
|
32044
|
|
|
33
|
|
|
|
|
251
|
|
13
|
33
|
|
|
33
|
|
15809
|
use LWP::Simple; |
|
33
|
|
|
|
|
1678830
|
|
|
33
|
|
|
|
|
271
|
|
14
|
|
|
|
|
|
|
|
15
|
33
|
|
|
33
|
|
11808
|
use Sport::Analytics::NHL::LocalConfig; |
|
33
|
|
|
|
|
76
|
|
|
33
|
|
|
|
|
4059
|
|
16
|
33
|
|
|
33
|
|
246
|
use Sport::Analytics::NHL::Config; |
|
33
|
|
|
|
|
67
|
|
|
33
|
|
|
|
|
5822
|
|
17
|
33
|
|
|
33
|
|
1617
|
use Sport::Analytics::NHL::Util; |
|
33
|
|
|
|
|
93
|
|
|
33
|
|
|
|
|
2645
|
|
18
|
33
|
|
|
33
|
|
1454
|
use Sport::Analytics::NHL::Tools; |
|
33
|
|
|
|
|
66
|
|
|
33
|
|
|
|
|
5495
|
|
19
|
33
|
|
|
33
|
|
14731
|
use Sport::Analytics::NHL::Report::BS; |
|
33
|
|
|
|
|
90
|
|
|
33
|
|
|
|
|
45058
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 NAME |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
Sport::Analytics::NHL::Scraper - Scrape and crawl the NHL website for data |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 SYNOPSIS |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Scrape and crawl the NHL website for data |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
use Sport::Analytics::NHL::Scraper |
30
|
|
|
|
|
|
|
my $schedules = crawl_schedule({ |
31
|
|
|
|
|
|
|
start_season => 2016, |
32
|
|
|
|
|
|
|
stop_season => 2017 |
33
|
|
|
|
|
|
|
}); |
34
|
|
|
|
|
|
|
... |
35
|
|
|
|
|
|
|
my $contents = crawl_game( |
36
|
|
|
|
|
|
|
{ season => 2011, stage => 2, season_id => 0001 }, # game 2011020001 in NHL accounting |
37
|
|
|
|
|
|
|
{ game_files => [qw(BS PL)], retries => 2 }, |
38
|
|
|
|
|
|
|
); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 IMPORTANT VARIABLE |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Variable @GAME_FILES contains specific definitions for the report types. Right now only the boxscore javascript has any meaningful non-default definitions; the PB feed seems to have become unavailable. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 FUNCTIONS |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=over 2 |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=item C |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
A wrapper around the LWP::Simple::get() call for retrying and control. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
Arguments: hash reference containing |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
* url => URL to access |
55
|
|
|
|
|
|
|
* retries => Number of retries |
56
|
|
|
|
|
|
|
* validate => sub reference to validate the download |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Returns: the content if both download and validation are successful undef otherwise. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=item C |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Crawls the NHL schedule. The schedule is accessed through a minimalistic live api first (only works for post-2010 seasons), then through the general /api/ |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Arguments: hash reference containing |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
* start_season => the first season to crawl |
67
|
|
|
|
|
|
|
* stop_season => the last season to crawl |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Returns: hash reference of seasonal schedules where seasons are the keys, and decoded JSONs are the values. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=item C |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Sets the arguments to populate the game URL for a given report type and game |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Arguments: |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
* document name, currently one of qw(BS PB RO ES GS PL) |
78
|
|
|
|
|
|
|
* game hashref containing |
79
|
|
|
|
|
|
|
- season => YYYY |
80
|
|
|
|
|
|
|
- stage => 2|3 |
81
|
|
|
|
|
|
|
- season ID => NNNN |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Returns: a configured list of arguments for the URL. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=item C |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Crawls the data for the given game |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
Arguments: |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
game data as hashref: |
92
|
|
|
|
|
|
|
* season => YYYY |
93
|
|
|
|
|
|
|
* stage => 2|3 |
94
|
|
|
|
|
|
|
* season ID => NNNN |
95
|
|
|
|
|
|
|
options hashref: |
96
|
|
|
|
|
|
|
* game_files => hashref of types of reports that are requested |
97
|
|
|
|
|
|
|
* force => 0|1 force overwrite of files already present in the system |
98
|
|
|
|
|
|
|
* retries => N number of the retries for every get call |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item C |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Crawls the data for an NHL player given his NHL id. First, the API call is made, and the JSON is retrieved. Unfortunately, the JSON does not contain the draft information, so another call to the HTML page is made to complete the information. The merged information is stored in a json file at the ROOT_DATA_DIR/players/$ID.json path. |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Arguments: |
105
|
|
|
|
|
|
|
* player's NHL id |
106
|
|
|
|
|
|
|
* options hashref: |
107
|
|
|
|
|
|
|
- data_dir root data dir location |
108
|
|
|
|
|
|
|
- playerfile_expiration -how long the saved playerfile should be trusted |
109
|
|
|
|
|
|
|
- force - crawl the player regardless |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Returns: the path to the saved file |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=back |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
our $SCHEDULE_JSON = 'http://live.nhle.com/GameData/SeasonSchedule-%s%s.json'; |
118
|
|
|
|
|
|
|
our $SCHEDULE_JSON_API = 'https://statsapi.web.nhl.com/api/v1/schedule?startDate=%s&endDate=%s'; |
119
|
|
|
|
|
|
|
our $HTML_REPORT_URL = 'http://www.nhl.com/scores/htmlreports/%d%d/%s%02d%04d.HTM'; |
120
|
|
|
|
|
|
|
our $PLAYER_URL = 'https://statsapi.web.nhl.com/api/v1/people/%d?expand=person.stats&stats=yearByYear,yearByYearPlayoffs&expand=stats.team&site=en_nhl'; |
121
|
|
|
|
|
|
|
our $SUPP_PLAYER_URL = "https://www.nhl.com/player/%d"; |
122
|
|
|
|
|
|
|
#our $ROTOWORLD_URL = 'http://www.rotoworld.com/teams/injuries/nhl/all/'; |
123
|
|
|
|
|
|
|
#our %ROTO_CENSUS = ( |
124
|
|
|
|
|
|
|
# 'CHRIS TANEV' => 'CHRISTOPHER TANEV', |
125
|
|
|
|
|
|
|
#); |
126
|
|
|
|
|
|
|
our @GAME_FILES = ( |
127
|
|
|
|
|
|
|
{ |
128
|
|
|
|
|
|
|
name => 'BS', |
129
|
|
|
|
|
|
|
pattern => 'https://statsapi.web.nhl.com/api/v1/game/%s/feed/live', |
130
|
|
|
|
|
|
|
extension => 'json', |
131
|
|
|
|
|
|
|
validate => sub { |
132
|
|
|
|
|
|
|
my $json = shift; |
133
|
|
|
|
|
|
|
my $bs = Sport::Analytics::NHL::Report::BS->new($json); |
134
|
|
|
|
|
|
|
return scalar @{$bs->{json}{liveData}{plays}{allPlays}}; |
135
|
|
|
|
|
|
|
1; |
136
|
|
|
|
|
|
|
}, |
137
|
|
|
|
|
|
|
}, |
138
|
|
|
|
|
|
|
{ |
139
|
|
|
|
|
|
|
name => 'PB', |
140
|
|
|
|
|
|
|
pattern => 'http://live.nhle.com/GameData/%s%s/%s/PlayByPlay.json', |
141
|
|
|
|
|
|
|
extension => 'json', |
142
|
|
|
|
|
|
|
disabled => 1, |
143
|
|
|
|
|
|
|
}, |
144
|
|
|
|
|
|
|
{ name => 'ES' }, |
145
|
|
|
|
|
|
|
{ name => 'GS' }, |
146
|
|
|
|
|
|
|
{ name => 'PL' }, |
147
|
|
|
|
|
|
|
{ name => 'RO' }, |
148
|
|
|
|
|
|
|
); |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
our @EXPORT = qw( |
151
|
|
|
|
|
|
|
crawl_schedule crawl_game crawl_player |
152
|
|
|
|
|
|
|
); |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
our $DEFAULT_RETRIES = 3; |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub scrape ($) { |
157
|
|
|
|
|
|
|
|
158
|
35
|
|
|
35
|
1
|
109
|
my $opts = shift; |
159
|
35
|
50
|
|
|
|
174
|
die "Can't scrape without a URL" unless defined $opts->{url}; |
160
|
|
|
|
|
|
|
|
161
|
35
|
50
|
|
|
|
173
|
return undef if $ENV{HOCKEYDB_NONET}; |
162
|
35
|
|
33
|
|
|
261
|
$opts->{retries} ||= $DEFAULT_RETRIES; |
163
|
35
|
|
100
|
34
|
|
336
|
$opts->{validate} ||= sub { 1 }; |
|
34
|
|
|
|
|
350
|
|
164
|
|
|
|
|
|
|
|
165
|
35
|
|
|
|
|
170
|
my $now = time; |
166
|
35
|
|
|
|
|
74
|
my $r = 0; |
167
|
35
|
|
|
|
|
76
|
my $content; |
168
|
35
|
|
100
|
|
|
245
|
while (! $content && $r++ < $opts->{retries}) { |
169
|
41
|
|
|
|
|
392
|
debug "Trying ($r/$opts->{retries}) $opts->{url}..."; |
170
|
41
|
|
|
|
|
229
|
$content = get($opts->{url}); |
171
|
41
|
50
|
|
|
|
18799116
|
unless ($opts->{validate}->($content)) { |
172
|
0
|
|
|
|
|
0
|
verbose "$opts->{url} failed validation, retrying"; |
173
|
0
|
|
|
|
|
0
|
$content = undef; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
35
|
100
|
|
|
|
728
|
debug sprintf("Retrieved in %.3f seconds", time - $now) if $content; |
177
|
35
|
|
|
|
|
305
|
$content; |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub crawl_schedule ($) { |
182
|
|
|
|
|
|
|
|
183
|
5
|
|
|
5
|
1
|
2564
|
my $opts = shift; |
184
|
|
|
|
|
|
|
|
185
|
5
|
|
33
|
|
|
22
|
my $start_season = $opts->{start_season} || $FIRST_SEASON; |
186
|
5
|
|
33
|
|
|
17
|
my $stop_season = $opts->{stop_season} || $CURRENT_SEASON; |
187
|
|
|
|
|
|
|
|
188
|
5
|
|
|
|
|
9
|
my $schedules = {}; |
189
|
5
|
|
|
|
|
23
|
for my $season ($start_season .. $stop_season) { |
190
|
6
|
50
|
|
|
|
21
|
next if grep { $_ == $season } @LOCKOUT_SEASONS; |
|
6
|
|
|
|
|
30
|
|
191
|
6
|
|
|
|
|
14
|
my $schedule_json; |
192
|
6
|
|
|
|
|
25
|
my $schedule_json_file = get_schedule_json_file($season); |
193
|
6
|
100
|
66
|
|
|
175
|
if ($season == $CURRENT_SEASON || ! -f $schedule_json_file) { |
194
|
4
|
|
|
|
|
26
|
my $schedule_json_url = sprintf($SCHEDULE_JSON, $season, $season+1); |
195
|
4
|
|
|
|
|
24
|
$schedule_json = scrape({ url => $schedule_json_url }); |
196
|
4
|
100
|
|
|
|
30
|
if (! $schedule_json) { |
197
|
2
|
|
|
|
|
13
|
my ($start_date, $stop_date) = get_start_stop_date($season); |
198
|
2
|
|
|
|
|
12
|
$schedule_json_url = sprintf($SCHEDULE_JSON_API, $start_date, $stop_date); |
199
|
2
|
|
|
|
|
10
|
$schedule_json = scrape({ url => $schedule_json_url }); |
200
|
2
|
50
|
|
|
|
23
|
if (! $schedule_json) { |
201
|
0
|
|
|
|
|
0
|
verbose "Couldn't download from $schedule_json_url, skipping..."; |
202
|
0
|
|
|
|
|
0
|
next; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
} |
205
|
4
|
|
|
|
|
25
|
write_file($schedule_json, $schedule_json_file); |
206
|
4
|
50
|
|
|
|
73
|
if (! -f $schedule_json_file) { |
207
|
0
|
|
|
|
|
0
|
print "ERROR: could not find a JSON schedule file, skipping..."; |
208
|
0
|
|
|
|
|
0
|
next; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
} |
211
|
6
|
|
66
|
|
|
42
|
$schedule_json ||= read_file($schedule_json_file); |
212
|
6
|
|
|
|
|
28171
|
$schedules->{$season} = decode_json($schedule_json); |
213
|
|
|
|
|
|
|
} |
214
|
5
|
|
|
|
|
5220
|
$schedules; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub get_game_url_args ($$) { |
218
|
|
|
|
|
|
|
|
219
|
24
|
|
|
24
|
1
|
1684
|
my $doc_name = shift; |
220
|
24
|
|
|
|
|
60
|
my $game = shift; |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
my $game_id = sprintf( |
223
|
|
|
|
|
|
|
"%04d%02d%04d", |
224
|
|
|
|
|
|
|
$game->{season}, $game->{stage}, $game->{season_id} |
225
|
24
|
|
|
|
|
182
|
); |
226
|
24
|
|
|
|
|
56
|
my @args; |
227
|
24
|
|
|
|
|
74
|
for ($doc_name) { |
228
|
24
|
|
|
|
|
105
|
when ('BS') { |
229
|
8
|
|
|
|
|
35
|
@args = ($game_id); |
230
|
|
|
|
|
|
|
} |
231
|
16
|
|
|
|
|
49
|
when ('PB') { |
232
|
1
|
|
|
|
|
3
|
@args = ($game->{season}, $game->{season} + 1, $game_id); |
233
|
|
|
|
|
|
|
} |
234
|
15
|
|
|
|
|
37
|
default { |
235
|
|
|
|
|
|
|
@args = ( |
236
|
|
|
|
|
|
|
$game->{season}, $game->{season} + 1, $doc_name, |
237
|
|
|
|
|
|
|
$game->{stage}, $game->{season_id} |
238
|
15
|
|
|
|
|
100
|
); |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
} |
241
|
24
|
|
|
|
|
98
|
@args; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub crawl_game ($;$) { |
245
|
|
|
|
|
|
|
|
246
|
7
|
|
|
7
|
1
|
6356
|
my $game = shift; |
247
|
7
|
|
50
|
|
|
57
|
my $opts = shift || {}; |
248
|
|
|
|
|
|
|
|
249
|
7
|
|
|
|
|
62
|
my $path = make_game_path($game->{season}, $game->{stage}, $game->{season_id}); |
250
|
7
|
|
|
|
|
28
|
my $contents = {}; |
251
|
7
|
|
|
|
|
69
|
for my $doc (@GAME_FILES) { |
252
|
42
|
100
|
|
|
|
180
|
next if $doc->{disabled}; |
253
|
35
|
50
|
33
|
|
|
140
|
next if $opts->{game_files} && ! $opts->{game_files}{$doc}; |
254
|
35
|
100
|
|
|
|
190
|
next if $game->{season} < $FIRST_REPORT_SEASONS{$doc->{name}}; |
255
|
21
|
|
|
|
|
136
|
my @args = get_game_url_args($doc->{name}, $game); |
256
|
21
|
|
66
|
|
|
154
|
$doc->{pattern} ||= $HTML_REPORT_URL; |
257
|
21
|
|
100
|
|
|
101
|
$doc->{extension} ||= 'html'; |
258
|
21
|
|
|
|
|
128
|
my $file = "$path/$doc->{name}.$doc->{extension}"; |
259
|
21
|
50
|
33
|
|
|
551
|
if (-f $file && ! $opts->{force}) { |
260
|
0
|
|
|
|
|
0
|
print STDERR "[NOTICE] File $file already exists, not crawling\n"; |
261
|
0
|
|
|
|
|
0
|
$contents->{$doc->{name}} = read_file($file); |
262
|
0
|
|
|
|
|
0
|
next; |
263
|
|
|
|
|
|
|
} |
264
|
21
|
|
|
|
|
153
|
my $url = sprintf($doc->{pattern}, @args); |
265
|
|
|
|
|
|
|
my $content = scrape({ |
266
|
|
|
|
|
|
|
url => $url, validate => $doc->{validate}, retries => $opts->{retries} |
267
|
21
|
|
|
|
|
189
|
}); |
268
|
21
|
50
|
|
|
|
170
|
if (! $content) { |
269
|
0
|
|
|
|
|
0
|
print STDERR "[WARNING] Got no content for $game->{season}, $game->{stage}, $game->{season_id}, $doc->{name}\n"; |
270
|
0
|
|
|
|
|
0
|
next; |
271
|
|
|
|
|
|
|
} |
272
|
21
|
100
|
|
|
|
16153
|
$content =~ s/\xC2\xA0/ /g unless $doc->{extension} eq 'json'; |
273
|
21
|
|
|
|
|
145
|
write_file($content, $file); |
274
|
21
|
|
|
|
|
262
|
$contents->{$doc->{name}} = {content => $content, file => $file}; |
275
|
|
|
|
|
|
|
} |
276
|
7
|
|
|
|
|
44
|
$contents; |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
sub crawl_player ($;$$$) { |
280
|
|
|
|
|
|
|
|
281
|
5
|
|
|
5
|
1
|
14979
|
my $id = shift; |
282
|
5
|
|
|
|
|
14
|
my $opts = shift; |
283
|
|
|
|
|
|
|
|
284
|
5
|
|
33
|
|
|
72
|
$opts->{data_dir} ||= $ENV{HOCKEYDB_DATA_DIR} || $DATA_DIR; |
|
|
|
33
|
|
|
|
|
285
|
5
|
|
33
|
|
|
40
|
$opts->{playerfile_expiration} ||= $DEFAULT_PLAYERFILE_EXPIRATION; |
286
|
5
|
|
|
|
|
13
|
my $sfx = 'json'; |
287
|
5
|
|
|
|
|
30
|
my $file = sprintf("%s/players/%d.%s", $opts->{data_dir}, $id, $sfx); |
288
|
|
|
|
|
|
|
|
289
|
5
|
50
|
66
|
|
|
160
|
if (-f $file && -M $file < $opts->{playerfile_expiration} && ! $opts->{force}) { |
|
|
|
66
|
|
|
|
|
290
|
0
|
|
|
|
|
0
|
debug "File exists and is recent, skipping"; |
291
|
0
|
|
|
|
|
0
|
return $file; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
5
|
|
|
|
|
56
|
my $content = scrape({ url => sprintf($PLAYER_URL, $id) }); |
295
|
5
|
100
|
|
|
|
42
|
if (! $content) { |
296
|
1
|
|
|
|
|
406
|
print STDERR "ID $id missing or network unavailable\n"; |
297
|
1
|
50
|
|
|
|
24
|
if (-f $file) { |
298
|
0
|
|
|
|
|
0
|
print STDERR "Using old available file\n"; |
299
|
0
|
|
|
|
|
0
|
return $file; |
300
|
|
|
|
|
|
|
} |
301
|
1
|
|
|
|
|
10
|
return; |
302
|
|
|
|
|
|
|
} |
303
|
4
|
|
|
|
|
2489
|
my $json = decode_json($content); |
304
|
4
|
|
|
|
|
18
|
$json = $json->{people}[0]; |
305
|
4
|
100
|
|
|
|
85
|
if (-f $file) { |
306
|
1
|
|
|
|
|
7
|
my $existing_json = decode_json(read_file($file)); |
307
|
1
|
|
|
|
|
7
|
for my $key (qw(draftyear draftteam round undrafted pick)) { |
308
|
5
|
50
|
|
|
|
163
|
$json->{$key} = $existing_json->{$key} if exists $existing_json->{$key}; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
else { |
312
|
3
|
|
|
|
|
18
|
my $supp_url = sprintf($SUPP_PLAYER_URL, $id); |
313
|
3
|
|
|
|
|
18
|
$content = scrape({url => $supp_url}); |
314
|
3
|
100
|
|
|
|
430
|
if ($content =~ /Draft:.*(\d{4}) (\S{3}), (\d+)\S\S rd, .* pk \((\d+)\D+ overall\)/) { |
315
|
2
|
|
|
|
|
16
|
$json->{draftyear} = $1+0; |
316
|
2
|
|
|
|
|
10
|
$json->{draftteam} = $2; |
317
|
2
|
|
|
|
|
9
|
$json->{round} = $3+0; |
318
|
2
|
|
|
|
|
7
|
$json->{undrafted} = 0; |
319
|
2
|
|
|
|
|
10
|
$json->{pick} = $4+0; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
else { |
322
|
1
|
|
|
|
|
11
|
$json->{undrafted} = 1; |
323
|
1
|
|
|
|
|
6
|
$json->{pick} = $UNDRAFTED_PICK; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
} |
326
|
4
|
|
|
|
|
2645
|
write_file(encode_json($json), $file); |
327
|
4
|
|
|
|
|
749
|
$file; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
1; |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=head1 AUTHOR |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
More Hockey Stats, C<< >> |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
=head1 BUGS |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
339
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
340
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=head1 SUPPORT |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
perldoc Sport::Analytics::NHL::Scraper |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
You can also look for information at: |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=over 4 |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
L |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
L |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=item * CPAN Ratings |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
L |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=item * Search CPAN |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
L |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=back |