line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package IMDB::Local; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Suggestions for improvements |
5
|
|
|
|
|
|
|
# - |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
12303
|
use 5.006; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
33
|
|
10
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
26
|
|
11
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
54
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
IMDB::Local - The great new IMDB::Local! |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 VERSION |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
Version 0.01 |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=cut |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our $VERSION = '1.00'; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 SYNOPSIS |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Quick summary of what the module does. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
Perhaps a little code snippet. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
use IMDB::Local; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $foo = IMDB::Local->new(); |
35
|
|
|
|
|
|
|
... |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 EXPORT |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
A list of functions that can be exported. You can delete this section |
40
|
|
|
|
|
|
|
if you don't export anything, such as for a purely object-oriented module. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=cut |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Use Term::ProgressBar if installed. |
47
|
1
|
|
|
|
|
2
|
use constant Have_bar => eval { |
48
|
1
|
|
|
|
|
180
|
require Term::ProgressBar; |
49
|
0
|
|
|
|
|
0
|
$Term::ProgressBar::VERSION >= 2; |
50
|
1
|
|
|
1
|
|
3
|
}; |
|
1
|
|
|
|
|
1
|
|
51
|
|
|
|
|
|
|
|
52
|
1
|
|
|
1
|
|
340
|
use IMDB::Local::DB; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head2 new |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
Create new IMDB::Local object. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Arguments: |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
imdbDir - required or die |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
verbose - optional, default is 0. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
listsDir - folder where list files exist (see IMDB::Local::Download). |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
showProgressBar - if non-zero and Term::ProgressBar is available progress bars in import methods will be displayed. Ignored if Term::ProgressBar is not available. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=cut |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
sub new |
71
|
|
|
|
|
|
|
{ |
72
|
|
|
|
|
|
|
my ($type) = shift; |
73
|
|
|
|
|
|
|
my $self={ @_ }; # remaining args become attributes |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
for ('imdbDir', 'verbose') { |
76
|
|
|
|
|
|
|
die "invalid usage - no $_" if ( !defined($self->{$_})); |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
#$self->{stages} = { 1=>'movies', 2=>'directors', 3=>'actors', 4=>'actresses', 5=>'genres', 6=>'ratings', 7=>'keywords', 8=>'plot' }; |
80
|
|
|
|
|
|
|
#$self->{optionalStages} = { 'keywords' => 7, 'plot' => 8 }; # list of optional stages - no need to download files for these |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
$self->{moviedbInfo}="$self->{imdbDir}/moviedb.info"; |
83
|
|
|
|
|
|
|
$self->{moviedbOffline}="$self->{imdbDir}/moviedb.offline"; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
if ( defined($self->{listsDir}) ) { |
86
|
|
|
|
|
|
|
$self->{listFiles}=new IMDB::Local::ListFiles(listsDir=>$self->{listsDir}); |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# only leave progress bar on if its available |
90
|
|
|
|
|
|
|
if ( !Have_bar ) { |
91
|
|
|
|
|
|
|
$self->{showProgressBar}=0; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
bless($self, $type); |
95
|
|
|
|
|
|
|
return($self); |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head2 listTypes |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Returns an array of list files supported (currently 'movies', 'directors', 'actors', 'actresses', 'genres', 'ratings', 'keywords', 'plot') |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=cut |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub listTypes($) |
106
|
|
|
|
|
|
|
{ |
107
|
|
|
|
|
|
|
my $self=shift; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
return( $self->{listFiles}->types() ); |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub error($$) |
114
|
|
|
|
|
|
|
{ |
115
|
|
|
|
|
|
|
my $self=shift; |
116
|
|
|
|
|
|
|
if ( defined($self->{logfd}) ) { |
117
|
|
|
|
|
|
|
print {$self->{logfd}} $_[0]."\n"; |
118
|
|
|
|
|
|
|
$self->{errorCountInLog}++; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
else { |
121
|
|
|
|
|
|
|
print STDERR $_[0]."\n"; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub status($$) |
126
|
|
|
|
|
|
|
{ |
127
|
|
|
|
|
|
|
my $self=shift; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
if ( $self->{verbose} ) { |
130
|
|
|
|
|
|
|
print STDERR $_[0]."\n"; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub withThousands ($) |
135
|
|
|
|
|
|
|
{ |
136
|
|
|
|
|
|
|
my ($val) = @_; |
137
|
|
|
|
|
|
|
$val =~ s/(\d{1,3}?)(?=(\d{3})+$)/$1,/g; |
138
|
|
|
|
|
|
|
return $val; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
use XMLTV::Gunzip; |
142
|
|
|
|
|
|
|
use IO::File; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub openMaybeGunzip($) |
145
|
|
|
|
|
|
|
{ |
146
|
|
|
|
|
|
|
for ( shift ) { |
147
|
|
|
|
|
|
|
return gunzip_open($_) if m/\.gz$/; |
148
|
|
|
|
|
|
|
return new IO::File("< $_"); |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub closeMaybeGunzip($$) |
153
|
|
|
|
|
|
|
{ |
154
|
|
|
|
|
|
|
if ( $_[0]=~m/\.gz$/o ) { |
155
|
|
|
|
|
|
|
# Would close($fh) but that causes segfaults on my system. |
156
|
|
|
|
|
|
|
# Investigating, but in the meantime just leave it open. |
157
|
|
|
|
|
|
|
# |
158
|
|
|
|
|
|
|
#return gunzip_close($_[1]); |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
# Apparently this can also segfault (wtf?). |
162
|
|
|
|
|
|
|
#return close($_[1]); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# Convert a title into a searchtitle by lowercasing, |
167
|
|
|
|
|
|
|
# making it ASCII and removing punctuation. |
168
|
|
|
|
|
|
|
# |
169
|
|
|
|
|
|
|
sub MakeSearchtitle($;$;$) { |
170
|
|
|
|
|
|
|
my ($DB, $str, $debug) = @_; |
171
|
|
|
|
|
|
|
return lc RemovePunctuation($DB, lc( CharsetMap( $str, $debug ) ), $debug ); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
use Text::Unidecode; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# All characters outside the ASCII range (0x00-0x7F) are replaced by ASCII equivalents, |
178
|
|
|
|
|
|
|
# using function Text::Unidecode::unidecode |
179
|
|
|
|
|
|
|
# |
180
|
|
|
|
|
|
|
sub CharsetMap($) { |
181
|
|
|
|
|
|
|
my ($str, $debug) = @_; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# do replacements that unidecode doesn't know about (or does wrong) |
184
|
|
|
|
|
|
|
### IT WOULD BE NICE IF THESE WERE IN A TABLE |
185
|
|
|
|
|
|
|
$str =~ s/\x{0133}/ij/g; # 'ij' -> ij ("" in unidecode) |
186
|
|
|
|
|
|
|
$str =~ s/\x{20ac}/EUR/g; # euro symbol -> EUR (EU in unidecode) |
187
|
|
|
|
|
|
|
$str =~ s/\x{2122}/TM/g; # trademark symbol -> TM ("" in unidecode) |
188
|
|
|
|
|
|
|
$str =~ s/\x{a3}/GBP/g; # pound sign -> GBP (PS in unidecode) |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# now do the real decode |
191
|
|
|
|
|
|
|
$str = unidecode($str); |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
#print "[$str]\n" if ($debug); |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
return ($str); |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
my @punctuation; |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# Function that removes all punctuation and whitespace from a string. |
202
|
|
|
|
|
|
|
# '&' is converted to 'and' along the way |
203
|
|
|
|
|
|
|
sub RemovePunctuation($;$;$) { |
204
|
|
|
|
|
|
|
my ($DB, $str, $debug) = @_; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# Load the array of hashes that contain the punctuation |
207
|
|
|
|
|
|
|
# replacements in priority order |
208
|
|
|
|
|
|
|
if ( !@punctuation ) { |
209
|
|
|
|
|
|
|
my @plist = @{$DB->select2Matrix("select priority,pattern,replacement from Punctuation order by priority")}; |
210
|
|
|
|
|
|
|
my $cnt = 0; |
211
|
|
|
|
|
|
|
foreach my $p (@plist) { |
212
|
|
|
|
|
|
|
my $pattern = $p->[1]; |
213
|
|
|
|
|
|
|
my $compiled = qr/$pattern/i; |
214
|
|
|
|
|
|
|
$punctuation[$cnt]{origpattern} = $pattern; |
215
|
|
|
|
|
|
|
$punctuation[$cnt]{pattern} = $compiled; |
216
|
|
|
|
|
|
|
$punctuation[$cnt]{replacement} = $p->[2]; |
217
|
|
|
|
|
|
|
$cnt++; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
foreach my $ref (@punctuation) { |
222
|
|
|
|
|
|
|
#print "[$str] $ref->{origpattern} " if ($debug); |
223
|
|
|
|
|
|
|
$str =~ s/$ref->{pattern}/$ref->{replacement}/g; |
224
|
|
|
|
|
|
|
#print "[$str]\n" if ($debug); |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
#print "<$str>\n--------\n" if ($debug); |
227
|
|
|
|
|
|
|
return ($str); |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub decodeImdbKey($$$) |
231
|
|
|
|
|
|
|
{ |
232
|
|
|
|
|
|
|
my ($self, $DB, $dbkey, $year, $titleID)=@_; |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
my %hash; |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
$hash{parentId}=0; |
237
|
|
|
|
|
|
|
$hash{series}=0; |
238
|
|
|
|
|
|
|
$hash{episode}=0; |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# drop episode information - ex: "Studio One" (1948) {Twelve Angry Men (#7.1)} |
242
|
|
|
|
|
|
|
if ( $dbkey=~s/\s*\{([^\}]+)\}//o ) { |
243
|
|
|
|
|
|
|
my $s=$1; |
244
|
|
|
|
|
|
|
if ( $s=~s/\s*\(\#(\d+)\.(\d+)\)$// ) { |
245
|
|
|
|
|
|
|
$hash{series}=$1; |
246
|
|
|
|
|
|
|
$hash{episode}=$2; |
247
|
|
|
|
|
|
|
$hash{title}=$s; |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# attempt to locate parentId matching series title |
250
|
|
|
|
|
|
|
my $parentKey=$dbkey; |
251
|
|
|
|
|
|
|
$parentKey=~s/^\"//o; |
252
|
|
|
|
|
|
|
$parentKey=~s/\" \(/ \(/o; |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
if ( defined($self->{seriesKeys}->{$parentKey}) ) { |
255
|
|
|
|
|
|
|
#if ( $parentKey=~m/24 Hour Design/o ) { |
256
|
|
|
|
|
|
|
#warn(" lok: $parentKey"); |
257
|
|
|
|
|
|
|
#} |
258
|
|
|
|
|
|
|
$hash{parentId}=$self->{seriesKeys}->{$parentKey}; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# todo - this would make things easier |
264
|
|
|
|
|
|
|
# change double-quotes around title to be (made-for-tv) suffix instead |
265
|
|
|
|
|
|
|
if ( $dbkey=~s/^\"//o && $dbkey=~s/\" \(/ \(/o) { |
266
|
|
|
|
|
|
|
if ( $dbkey=~s/\s+\(mini\)$//o ) { |
267
|
|
|
|
|
|
|
if ( $hash{parentId} == 0 ) { |
268
|
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"tv_mini_series"}; |
269
|
|
|
|
|
|
|
$self->{seriesKeys}->{$dbkey}=$titleID; |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
else { |
272
|
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"episode_of_tv_mini_series"}; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
else { |
276
|
|
|
|
|
|
|
if ( $hash{parentId} == 0 ) { |
277
|
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"tv_series"}; |
278
|
|
|
|
|
|
|
#if ( $dbkey=~m/24 Hour Design/o ) { |
279
|
|
|
|
|
|
|
#warn(" key: $dbkey"); |
280
|
|
|
|
|
|
|
#} |
281
|
|
|
|
|
|
|
$self->{seriesKeys}->{$dbkey}=$titleID; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
else { |
284
|
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"episode_of_tv_series"}; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
elsif ( $dbkey=~s/\s+\(TV\)$//o ) { |
289
|
|
|
|
|
|
|
# how rude, some entries have (TV) appearing more than once. |
290
|
|
|
|
|
|
|
#$dbkey=~s/\s*\(TV\)$//o; |
291
|
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"tv_movie"}; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
elsif ( $dbkey=~s/\s+\(V\)$//o ) { |
294
|
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"video_movie"}; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
elsif ( $dbkey=~s/\s+\(VG\)$//o ) { |
297
|
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"video_game"}; |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
else { |
300
|
|
|
|
|
|
|
$hash{qualifier}=$self->{QualifierTypeIDs}->{"movie"}; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
#if ( $dbkey=~s/\s+\((tv_series|tv_mini_series|tv_movie|video_movie|video_game)\)$//o ) { |
304
|
|
|
|
|
|
|
# $qualifier=$1; |
305
|
|
|
|
|
|
|
#} |
306
|
|
|
|
|
|
|
$hash{dbkey}=$dbkey; |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
my $title=$dbkey; |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# todo - this is the wrong year for episode titles |
311
|
|
|
|
|
|
|
if ( $title=~m/^\"/o && $title=~m/\"\s*\(/o ) { #" |
312
|
|
|
|
|
|
|
$title=~s/^\"//o; #" |
313
|
|
|
|
|
|
|
$title=~s/\"(\s*\()/$1/o; #" |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
if ( $title=~s/\s+\((\d\d\d\d)\)$//o || |
317
|
|
|
|
|
|
|
$title=~s/\s+\((\d\d\d\d)\/[IVX]+\)$//o ) { |
318
|
|
|
|
|
|
|
# over-ride with what is given |
319
|
|
|
|
|
|
|
if ( defined($year) ) { |
320
|
|
|
|
|
|
|
$hash{year}=$year; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
else { |
323
|
|
|
|
|
|
|
$hash{year}=$1; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
elsif ( $title=~s/\s+\((\?\?\?\?)\)$//o || |
327
|
|
|
|
|
|
|
$title=~s/\s+\((\?\?\?\?)\/[IVX]+\)$//o ) { |
328
|
|
|
|
|
|
|
# over-ride with what is given |
329
|
|
|
|
|
|
|
if ( defined($year) ) { |
330
|
|
|
|
|
|
|
$hash{year}=$year; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
else { |
333
|
|
|
|
|
|
|
$hash{year}=""; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
else { |
337
|
|
|
|
|
|
|
$self->error("movie list format failed to decode year from title '$title'"); |
338
|
|
|
|
|
|
|
# over-ride with what is given |
339
|
|
|
|
|
|
|
if ( defined($year) ) { |
340
|
|
|
|
|
|
|
$hash{year}=$year; |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
else { |
343
|
|
|
|
|
|
|
$hash{year}=""; |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
$title=~s/(.*),\s*(The|A|Une|Las|Les|Los|L\'|Le|La|El|Das|De|Het|Een)$/$2 $1/og; |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
if ( !defined($hash{title}) ) { |
349
|
|
|
|
|
|
|
$hash{title}=$title; |
350
|
|
|
|
|
|
|
$hash{searchTitle}=MakeSearchtitle($DB, $hash{title}, 0); |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# todo - is this more useful ? |
353
|
|
|
|
|
|
|
#$hash{searchTitleWithYear}=MakeSearchtitle($DB, $title."(".$hash{year}.")", 0); |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
else { |
356
|
|
|
|
|
|
|
$hash{searchTitle}=MakeSearchtitle($DB, $title."(".$hash{year}.")", 0); |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# todo - is this more useful ? |
359
|
|
|
|
|
|
|
#$hash{searchTitleWithYear}=MakeSearchtitle($DB, $title."(".$hash{year}.")", 0); |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
return(\%hash); |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
sub importMovies($$$$) |
366
|
|
|
|
|
|
|
{ |
367
|
|
|
|
|
|
|
my ($self, $countEstimate, $file, $DB)=@_; |
368
|
|
|
|
|
|
|
my $startTime=time(); |
369
|
|
|
|
|
|
|
my $lineCount=0; |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
372
|
|
|
|
|
|
|
while(<$fh>) { |
373
|
|
|
|
|
|
|
$lineCount++; |
374
|
|
|
|
|
|
|
if ( m/^MOVIES LIST/o ) { |
375
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
376
|
|
|
|
|
|
|
$self->error("missing ======= after 'MOVIES LIST' at line $lineCount"); |
377
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
378
|
|
|
|
|
|
|
return(-1); |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o ) { |
381
|
|
|
|
|
|
|
$self->error("missing empty line after ======= at line $lineCount"); |
382
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
383
|
|
|
|
|
|
|
return(-1); |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
last; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
elsif ( $lineCount > 1000 ) { |
388
|
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"MOVIES LIST\" line"); |
389
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
390
|
|
|
|
|
|
|
return(-1); |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Movies", |
395
|
|
|
|
|
|
|
count => $countEstimate, |
396
|
|
|
|
|
|
|
ETA => 'linear'}) |
397
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ); |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
400
|
|
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
401
|
|
|
|
|
|
|
my $next_update=0; |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# preload qualifier types |
404
|
|
|
|
|
|
|
$self->{QualifierTypeIDs}=$DB->select2Hash("select Name, QualifierTypeID from QualifierTypes"); |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
#$DB->runSQL("BEGIN TRANSACTION"); |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
my $count=0; |
409
|
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Titles VALUES (?,?,?,?,?,?,?,?)'); |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
while(<$fh>) { |
412
|
|
|
|
|
|
|
$lineCount++; |
413
|
|
|
|
|
|
|
my $line=$_; |
414
|
|
|
|
|
|
|
#print "read line $lineCount:$line\n"; |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
# end is line consisting of only '-' |
417
|
|
|
|
|
|
|
last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
next if ( $line=~m/\{\{SUSPENDED\}\}/o ); |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
$line=~s/\n$//o; |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
my $tab=index($line, "\t"); |
424
|
|
|
|
|
|
|
if ( $tab != -1 ) { |
425
|
|
|
|
|
|
|
my $ykey=substr($line, $tab+1); |
426
|
|
|
|
|
|
|
if ( $ykey=m/\s+(\d\d\d\d)$/ ) { |
427
|
|
|
|
|
|
|
$ykey=$1; |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
elsif ( $ykey=m/\s+(\?\?\?\?)$/ ) { |
430
|
|
|
|
|
|
|
$ykey=undef; |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
elsif ( $ykey=m/\s+(\d\d\d\d)\-(\?\?\?\?)$/ ) { |
433
|
|
|
|
|
|
|
$ykey=$1; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
elsif ( $ykey=m/\s+(\d\d\d\d)\-(\d\d\d\d)$/ ) { |
436
|
|
|
|
|
|
|
$ykey=$1; |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
else { |
439
|
|
|
|
|
|
|
warn("invalid year ($ykey) - $line"); |
440
|
|
|
|
|
|
|
#$ykey=undef; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
my $mkey=substr($line, 0, $tab); |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
# lets not import video games |
446
|
|
|
|
|
|
|
#if ( $decoded->{qualifier} != $self->{QualifierTypeIDs}->{'video_game'} ) { |
447
|
|
|
|
|
|
|
# returned count is number of titles found |
448
|
|
|
|
|
|
|
$count++; |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
my $decoded=$self->decodeImdbKey($DB, $mkey, $ykey, $count); |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
$tableInsert_sth->execute($count, |
453
|
|
|
|
|
|
|
$decoded->{searchTitle}, |
454
|
|
|
|
|
|
|
$decoded->{title}, |
455
|
|
|
|
|
|
|
$decoded->{qualifier}, |
456
|
|
|
|
|
|
|
$decoded->{year}, |
457
|
|
|
|
|
|
|
$decoded->{parentId}, |
458
|
|
|
|
|
|
|
$decoded->{series}, |
459
|
|
|
|
|
|
|
$decoded->{episode}); |
460
|
|
|
|
|
|
|
if ( $mkey=~m/24 Hour Design/o ) { |
461
|
|
|
|
|
|
|
warn("",join(',', $count, |
462
|
|
|
|
|
|
|
$decoded->{searchTitle}, |
463
|
|
|
|
|
|
|
$decoded->{title}, |
464
|
|
|
|
|
|
|
$decoded->{qualifier}, |
465
|
|
|
|
|
|
|
$decoded->{year}, |
466
|
|
|
|
|
|
|
$decoded->{parentId}, |
467
|
|
|
|
|
|
|
$decoded->{series}, |
468
|
|
|
|
|
|
|
$decoded->{episode})); |
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
#$DB->insert_db_raw(); |
472
|
|
|
|
|
|
|
#my $titleID=$DB->insert_db('Titles', 'TitleID', IMDBKey=>$mkey, |
473
|
|
|
|
|
|
|
#Title=>$decoded->{title}, |
474
|
|
|
|
|
|
|
# Episode=>$decoded->{episode}, |
475
|
|
|
|
|
|
|
# Year=>$decoded->{year}, |
476
|
|
|
|
|
|
|
# QualifierTypeID=>$decoded->{qualifier}); |
477
|
|
|
|
|
|
|
$self->{imdbMovie2DBKey}->{$mkey}=$count; |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
#if ( ($count % 50000) == 0 ) { |
480
|
|
|
|
|
|
|
#$DB->commit(); |
481
|
|
|
|
|
|
|
#} |
482
|
|
|
|
|
|
|
#} |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
485
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
486
|
|
|
|
|
|
|
if ( $count > $countEstimate ) { |
487
|
|
|
|
|
|
|
$countEstimate = $progress->target($count+1000); |
488
|
|
|
|
|
|
|
$next_update=$progress->update($count); |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
elsif ( $count > $next_update ) { |
491
|
|
|
|
|
|
|
$next_update=$progress->update($count); |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
else { |
496
|
|
|
|
|
|
|
$self->error("$file:$lineCount: unrecognized format (missing tab)"); |
497
|
|
|
|
|
|
|
$next_update=$progress->update($count) if ($self->{showProgressBar}); |
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
#$DB->runSQL("END TRANSACTION"); |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
$progress->update($countEstimate) if ($self->{showProgressBar}); |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
$self->status(sprintf("importing Movies found ".withThousands($count)." titles in ". |
505
|
|
|
|
|
|
|
withThousands($lineCount)." lines in %d seconds",time()-$startTime)); |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
508
|
|
|
|
|
|
|
return($count); |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
sub importGenres($$$$) |
512
|
|
|
|
|
|
|
{ |
513
|
|
|
|
|
|
|
my ($self, $countEstimate, $file, $DB)=@_; |
514
|
|
|
|
|
|
|
my $startTime=time(); |
515
|
|
|
|
|
|
|
my $lineCount=0; |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
518
|
|
|
|
|
|
|
while(<$fh>) { |
519
|
|
|
|
|
|
|
$lineCount++; |
520
|
|
|
|
|
|
|
if ( m/^8: THE GENRES LIST/o ) { |
521
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
522
|
|
|
|
|
|
|
$self->error("missing ======= after 'THE GENRES LIST' at line $lineCount"); |
523
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
524
|
|
|
|
|
|
|
return(-1); |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o ) { |
527
|
|
|
|
|
|
|
$self->error("missing empty line after ======= at line $lineCount"); |
528
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
529
|
|
|
|
|
|
|
return(-1); |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
last; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
elsif ( $lineCount > 1000 ) { |
534
|
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"THE GENRES LIST\" line"); |
535
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
536
|
|
|
|
|
|
|
return(-1); |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Genres", |
541
|
|
|
|
|
|
|
count => $countEstimate, |
542
|
|
|
|
|
|
|
ETA => 'linear'}) |
543
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ); |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
546
|
|
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
547
|
|
|
|
|
|
|
my $next_update=0; |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
# preload qualifier types |
550
|
|
|
|
|
|
|
$self->{QualifierTypeIDs}=$DB->select2Hash("select Name, QualifierTypeID from QualifierTypes"); |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
#$DB->runSQL("BEGIN TRANSACTION"); |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
my $count=0; |
555
|
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Titles2Genres VALUES (?,?)'); |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
while(<$fh>) { |
558
|
|
|
|
|
|
|
$lineCount++; |
559
|
|
|
|
|
|
|
my $line=$_; |
560
|
|
|
|
|
|
|
#print "read line $lineCount:$line\n"; |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# end is line consisting of only '-' |
563
|
|
|
|
|
|
|
last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
$line=~s/\n$//o; |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
my $tab=index($line, "\t"); |
568
|
|
|
|
|
|
|
if ( $tab != -1 ) { |
569
|
|
|
|
|
|
|
my $mkey=substr($line, 0, $tab); |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
next if ($mkey=~m/\s*\{\{SUSPENDED\}\}/o); |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
# Genres |
574
|
|
|
|
|
|
|
# don't see what these are...? |
575
|
|
|
|
|
|
|
# ignore {{SUSPENDED}} |
576
|
|
|
|
|
|
|
$mkey=~s/\s*\{\{SUSPENDED\}\}//o; |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
# ignore {Twelve Angry Men (1954)} |
579
|
|
|
|
|
|
|
# TODO - do we want this ? |
580
|
|
|
|
|
|
|
#$mkey=~s/\s*\{[^\}]+\}//go; |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
# skip enties that have {} in them since they're tv episodes |
583
|
|
|
|
|
|
|
#next if ( $mkey=~s/\s*\{[^\}]+\}$//o ); |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
my $genre=substr($line, $tab); |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
# genres sometimes has more than one tab |
588
|
|
|
|
|
|
|
$genre=~s/^\t+//og; |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$mkey} ) { |
591
|
|
|
|
|
|
|
# insert into db as discovered |
592
|
|
|
|
|
|
|
if ( ! defined($self->{GenreID}->{$genre}) ) { |
593
|
|
|
|
|
|
|
$self->{GenreID}->{$genre}=$DB->insert_db('Genres', 'GenreID', Name=>$genre); |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
$tableInsert_sth->execute($self->{imdbMovie2DBKey}->{$mkey}, |
596
|
|
|
|
|
|
|
$self->{GenreID}->{$genre}); |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
# returned count is number of titles found |
599
|
|
|
|
|
|
|
$count++; |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
if ( ($count % 50000) ==0 ) { |
602
|
|
|
|
|
|
|
$DB->commit(); |
603
|
|
|
|
|
|
|
} |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
607
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
608
|
|
|
|
|
|
|
if ( $count > $countEstimate ) { |
609
|
|
|
|
|
|
|
$countEstimate = $progress->target($count+1000); |
610
|
|
|
|
|
|
|
$next_update=$progress->update($count); |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
elsif ( $count > $next_update ) { |
613
|
|
|
|
|
|
|
$next_update=$progress->update($count); |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
else { |
618
|
|
|
|
|
|
|
$self->error("$file:$lineCount: unrecognized format (missing tab)"); |
619
|
|
|
|
|
|
|
$next_update=$progress->update($count) if ($self->{showProgressBar}); |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
#$DB->runSQL("END TRANSACTION"); |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
$progress->update($countEstimate) if ($self->{showProgressBar}); |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
$self->status(sprintf("importing Genres found ".withThousands($count)." titles in ". |
627
|
|
|
|
|
|
|
withThousands($lineCount)." lines in %d seconds",time()-$startTime)); |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
630
|
|
|
|
|
|
|
return($count); |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
sub importActors($$$$) |
634
|
|
|
|
|
|
|
{ |
635
|
|
|
|
|
|
|
my ($self, $whichCastOrDirector, $castCountEstimate, $file, $DB)=@_; |
636
|
|
|
|
|
|
|
my $startTime=time(); |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
if ( $whichCastOrDirector eq "Actors" ) { |
639
|
|
|
|
|
|
|
if ( $DB->table_row_count('Actors') > 0 || |
640
|
|
|
|
|
|
|
$DB->table_row_count('Titles2Actors') > 0 ) { |
641
|
|
|
|
|
|
|
$self->status("clearing previously loaded data.."); |
642
|
|
|
|
|
|
|
$DB->runSQL("DELETE from Actors"); |
643
|
|
|
|
|
|
|
$DB->runSQL("DELETE from Titles2Actors"); |
644
|
|
|
|
|
|
|
} |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
my $header; |
648
|
|
|
|
|
|
|
my $whatAreWeParsing; |
649
|
|
|
|
|
|
|
my $lineCount=0; |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
if ( $whichCastOrDirector eq "Actors" ) { |
652
|
|
|
|
|
|
|
$header="THE ACTORS LIST"; |
653
|
|
|
|
|
|
|
$whatAreWeParsing=1; |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
elsif ( $whichCastOrDirector eq "Actresses" ) { |
656
|
|
|
|
|
|
|
$header="THE ACTRESSES LIST"; |
657
|
|
|
|
|
|
|
$whatAreWeParsing=2; |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
else { |
660
|
|
|
|
|
|
|
die "why are we here ?"; |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
664
|
|
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing $whichCastOrDirector", |
665
|
|
|
|
|
|
|
count => $castCountEstimate, |
666
|
|
|
|
|
|
|
ETA => 'linear'}) |
667
|
|
|
|
|
|
|
if ($self->{showProgressBar}); |
668
|
|
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
669
|
|
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
670
|
|
|
|
|
|
|
my $next_update=0; |
671
|
|
|
|
|
|
|
while(<$fh>) { |
672
|
|
|
|
|
|
|
$lineCount++; |
673
|
|
|
|
|
|
|
if ( m/^$header/ ) { |
674
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
675
|
|
|
|
|
|
|
$self->error("missing ======= after $header at line $lineCount"); |
676
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
677
|
|
|
|
|
|
|
return(-1); |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o ) { |
680
|
|
|
|
|
|
|
$self->error("missing empty line after ======= at line $lineCount"); |
681
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
682
|
|
|
|
|
|
|
return(-1); |
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^Name\s+Titles\s*$/o ) { |
685
|
|
|
|
|
|
|
$self->error("missing name/titles line after ======= at line $lineCount"); |
686
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
687
|
|
|
|
|
|
|
return(-1); |
688
|
|
|
|
|
|
|
} |
689
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^[\s\-]+$/o ) { |
690
|
|
|
|
|
|
|
$self->error("missing name/titles suffix line after ======= at line $lineCount"); |
691
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
692
|
|
|
|
|
|
|
return(-1); |
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
last; |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
elsif ( $lineCount > 1000 ) { |
697
|
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"$header\" line"); |
698
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
699
|
|
|
|
|
|
|
return(-1); |
700
|
|
|
|
|
|
|
} |
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
my $cur_name; |
704
|
|
|
|
|
|
|
my $count=0; |
705
|
|
|
|
|
|
|
my $castNames=0; |
706
|
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Actors VALUES (?,?,?)'); |
707
|
|
|
|
|
|
|
my $tableInsert_sth2=$DB->prepare('INSERT INTO Titles2Actors VALUES (?,?,?,?,?)'); |
708
|
|
|
|
|
|
|
my $cur_actorId=$DB->select2Scalar('select count(1) from Actors'); |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
while(<$fh>) { |
711
|
|
|
|
|
|
|
$lineCount++; |
712
|
|
|
|
|
|
|
my $line=$_; |
713
|
|
|
|
|
|
|
$line=~s/\n$//o; |
714
|
|
|
|
|
|
|
#$self->status("read line $lineCount:$line"); |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
# end is line consisting of only '-' |
717
|
|
|
|
|
|
|
last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
next if ( length($line) == 0 ); |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
# try ignoring these |
722
|
|
|
|
|
|
|
next if ($line=~m/\s*\{\{SUSPENDED\}\}/o); |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
my $billing=0; |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
# actors or actresses |
727
|
|
|
|
|
|
|
if ( $line=~s/\s*<(\d+)>//o ) { |
728
|
|
|
|
|
|
|
$billing=int($1); |
729
|
|
|
|
|
|
|
next if ( $billing >3 ); |
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
if ( $line=~s/^([^\t]+)\t+//o ) { |
733
|
|
|
|
|
|
|
$cur_name=$1; |
734
|
|
|
|
|
|
|
$castNames++; |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
$cur_actorId++; |
737
|
|
|
|
|
|
|
$tableInsert_sth->execute($cur_actorId, MakeSearchtitle($DB, $cur_name, 0), $cur_name); |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
740
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
741
|
|
|
|
|
|
|
if ( $castNames > $castCountEstimate ) { |
742
|
|
|
|
|
|
|
$castCountEstimate = $progress->target($castNames+100); |
743
|
|
|
|
|
|
|
$next_update=$progress->update($castNames); |
744
|
|
|
|
|
|
|
} |
745
|
|
|
|
|
|
|
elsif ( $castNames > $next_update ) { |
746
|
|
|
|
|
|
|
$next_update=$progress->update($castNames); |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
} |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
my $isHost=0; |
752
|
|
|
|
|
|
|
my $isNarrator=0; |
753
|
|
|
|
|
|
|
if ( (my $start=index($line, " [")) != -1 ) { |
754
|
|
|
|
|
|
|
#my $end=rindex($line, "]"); |
755
|
|
|
|
|
|
|
my $ex=substr($line, $start+1); |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
if ( $ex=~s/Host//o ) { |
758
|
|
|
|
|
|
|
$isHost=1; |
759
|
|
|
|
|
|
|
} |
760
|
|
|
|
|
|
|
if ( $ex=~s/Narrator//o ) { |
761
|
|
|
|
|
|
|
$isNarrator=1; |
762
|
|
|
|
|
|
|
} |
763
|
|
|
|
|
|
|
$line=substr($line, 0, $start); |
764
|
|
|
|
|
|
|
# ignore character name |
765
|
|
|
|
|
|
|
} |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
# BUG |
768
|
|
|
|
|
|
|
# [honir] this is wrong - this puts cast from all the episodes as though they are in the entire series! |
769
|
|
|
|
|
|
|
# ##ignore {Twelve Angry Men (1954)} |
770
|
|
|
|
|
|
|
# TODO - I REMOVED THIS, should we ? |
771
|
|
|
|
|
|
|
#$line=~s/\s*\{[^\}]+\}//o; |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
if ( $line=~s/\s*\(aka ([^\)]+)\).*$//o ) { |
774
|
|
|
|
|
|
|
#$attrs=$1; |
775
|
|
|
|
|
|
|
} |
776
|
|
|
|
|
|
|
# TODO - what are we ignoring here ? |
777
|
|
|
|
|
|
|
if ( $line=~s/ (\(.*)$//o ) { |
778
|
|
|
|
|
|
|
#$attrs=$1; |
779
|
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
$line=~s/^\s+//og; |
781
|
|
|
|
|
|
|
$line=~s/\s+$//og; |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
if ( $line=~s/\s+Narrator$//o ) { |
784
|
|
|
|
|
|
|
# TODO - do we want to store this ? Does it actually occur ? |
785
|
|
|
|
|
|
|
# ignore |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
#if ( $line=~s/\s*\([A-Z]+\)$//o ) { |
789
|
|
|
|
|
|
|
#} |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$line} ) { |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
$tableInsert_sth2->execute($self->{imdbMovie2DBKey}->{$line}, $cur_actorId, $billing, $isHost, $isNarrator); |
794
|
|
|
|
|
|
|
$count++; |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
if ( ($count % 50000) == 0 ) { |
797
|
|
|
|
|
|
|
$DB->commit(); |
798
|
|
|
|
|
|
|
} |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
else { |
801
|
|
|
|
|
|
|
#warn($line); |
802
|
|
|
|
|
|
|
} |
803
|
|
|
|
|
|
|
} |
804
|
|
|
|
|
|
|
$progress->update($castCountEstimate) if ($self->{showProgressBar}); |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
$self->status(sprintf("importing $whichCastOrDirector found ".withThousands($castNames)." names, ". |
807
|
|
|
|
|
|
|
withThousands($count)." titles in ".withThousands($lineCount)." lines in %d seconds",time()-$startTime)); |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
return($castNames); |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
sub importDirectors($$$) |
815
|
|
|
|
|
|
|
{ |
816
|
|
|
|
|
|
|
my ($self, $castCountEstimate, $file, $DB)=@_; |
817
|
|
|
|
|
|
|
my $startTime=time(); |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
my $lineCount=0; |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
if ( $DB->table_row_count('Directors') > 0 || |
822
|
|
|
|
|
|
|
$DB->table_row_count('Titles2Directors') > 0 ) { |
823
|
|
|
|
|
|
|
$self->status("clearing previously loaded data.."); |
824
|
|
|
|
|
|
|
$DB->runSQL("DELETE from Directors"); |
825
|
|
|
|
|
|
|
$DB->runSQL("DELETE from Titles2Directors"); |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
829
|
|
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Directors", |
830
|
|
|
|
|
|
|
count => $castCountEstimate, |
831
|
|
|
|
|
|
|
ETA => 'linear'}) |
832
|
|
|
|
|
|
|
if ($self->{showProgressBar}); |
833
|
|
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
834
|
|
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
835
|
|
|
|
|
|
|
my $next_update=0; |
836
|
|
|
|
|
|
|
while(<$fh>) { |
837
|
|
|
|
|
|
|
$lineCount++; |
838
|
|
|
|
|
|
|
if ( m/^THE DIRECTORS LIST/ ) { |
839
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
840
|
|
|
|
|
|
|
$self->error("missing ======= after THE DIRECTORS LIST at line $lineCount"); |
841
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
842
|
|
|
|
|
|
|
return(-1); |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o ) { |
845
|
|
|
|
|
|
|
$self->error("missing empty line after ======= at line $lineCount"); |
846
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
847
|
|
|
|
|
|
|
return(-1); |
848
|
|
|
|
|
|
|
} |
849
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^Name\s+Titles\s*$/o ) { |
850
|
|
|
|
|
|
|
$self->error("missing name/titles line after ======= at line $lineCount"); |
851
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
852
|
|
|
|
|
|
|
return(-1); |
853
|
|
|
|
|
|
|
} |
854
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^[\s\-]+$/o ) { |
855
|
|
|
|
|
|
|
$self->error("missing name/titles suffix line after ======= at line $lineCount"); |
856
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
857
|
|
|
|
|
|
|
return(-1); |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
last; |
860
|
|
|
|
|
|
|
} |
861
|
|
|
|
|
|
|
elsif ( $lineCount > 1000 ) { |
862
|
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"THE DIRECTORS LIST\" line"); |
863
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
864
|
|
|
|
|
|
|
return(-1); |
865
|
|
|
|
|
|
|
} |
866
|
|
|
|
|
|
|
} |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
my $cur_name; |
869
|
|
|
|
|
|
|
my $count=0; |
870
|
|
|
|
|
|
|
my $castNames=0; |
871
|
|
|
|
|
|
|
my %found; |
872
|
|
|
|
|
|
|
my $directorCount=0; |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Directors VALUES (?,?,?)'); |
875
|
|
|
|
|
|
|
my $tableInsert_sth2=$DB->prepare('INSERT INTO Titles2Directors VALUES (?,?)'); |
876
|
|
|
|
|
|
|
while(<$fh>) { |
877
|
|
|
|
|
|
|
$lineCount++; |
878
|
|
|
|
|
|
|
my $line=$_; |
879
|
|
|
|
|
|
|
$line=~s/\n$//o; |
880
|
|
|
|
|
|
|
#$self->status("read line $lineCount:$line"); |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
# end is line consisting of only '-' |
883
|
|
|
|
|
|
|
last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
next if ( length($line) == 0 ); |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
if ( $line=~s/^([^\t]+)\t+//o ) { |
888
|
|
|
|
|
|
|
$cur_name=$1; |
889
|
|
|
|
|
|
|
$castNames++; |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
892
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
893
|
|
|
|
|
|
|
if ( $castNames > $castCountEstimate ) { |
894
|
|
|
|
|
|
|
$castCountEstimate = $progress->target($castNames+100); |
895
|
|
|
|
|
|
|
$next_update=$progress->update($castNames); |
896
|
|
|
|
|
|
|
} |
897
|
|
|
|
|
|
|
elsif ( $castNames > $next_update ) { |
898
|
|
|
|
|
|
|
$next_update=$progress->update($castNames); |
899
|
|
|
|
|
|
|
} |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
} |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
# try ignoring these |
904
|
|
|
|
|
|
|
next if ($line=~m/\s*\{\{SUSPENDED\}\}/o); |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
# BUG |
907
|
|
|
|
|
|
|
# [honir] this is wrong - this puts cast from all the episodes as though they are in the entire series! |
908
|
|
|
|
|
|
|
# ##ignore {Twelve Angry Men (1954)} |
909
|
|
|
|
|
|
|
$line=~s/\s*\{[^\}]+\}//o; |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
# TODO - what are we ignoring here ? |
912
|
|
|
|
|
|
|
if ( $line=~s/ (\(.*)$//o ) { |
913
|
|
|
|
|
|
|
# $attrs=$1; |
914
|
|
|
|
|
|
|
} |
915
|
|
|
|
|
|
|
$line=~s/^\s+//og; |
916
|
|
|
|
|
|
|
$line=~s/\s+$//og; |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$line} ) { |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
if ( !defined($found{$cur_name}) ) { |
921
|
|
|
|
|
|
|
$found{$cur_name}=$directorCount++; |
922
|
|
|
|
|
|
|
$tableInsert_sth->execute($directorCount, MakeSearchtitle($DB, $cur_name, 0), $cur_name); |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
$tableInsert_sth2->execute($self->{imdbMovie2DBKey}->{$line}, $found{$cur_name}); |
926
|
|
|
|
|
|
|
$count++; |
927
|
|
|
|
|
|
|
if ( ($count % 50000) == 0 ) { |
928
|
|
|
|
|
|
|
$DB->commit(); |
929
|
|
|
|
|
|
|
} |
930
|
|
|
|
|
|
|
} |
931
|
|
|
|
|
|
|
} |
932
|
|
|
|
|
|
|
$progress->update($castCountEstimate) if ($self->{showProgressBar}); |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
$self->status(sprintf("importing Directors found ".withThousands($castNames)." names, ". |
935
|
|
|
|
|
|
|
withThousands($count)." titles in ".withThousands($lineCount)." lines in %d seconds",time()-$startTime)); |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
return($castNames); |
940
|
|
|
|
|
|
|
} |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
sub importRatings($$) |
943
|
|
|
|
|
|
|
{ |
944
|
|
|
|
|
|
|
my ($self, $countEstimate, $file, $DB)=@_; |
945
|
|
|
|
|
|
|
my $startTime=time(); |
946
|
|
|
|
|
|
|
my $lineCount=0; |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
if ( $DB->table_row_count('Ratings') > 0 ) { |
949
|
|
|
|
|
|
|
$self->status("clearing previously loaded data.."); |
950
|
|
|
|
|
|
|
$DB->runSQL("DELETE from Ratings"); |
951
|
|
|
|
|
|
|
} |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
954
|
|
|
|
|
|
|
while(<$fh>) { |
955
|
|
|
|
|
|
|
$lineCount++; |
956
|
|
|
|
|
|
|
if ( m/^MOVIE RATINGS REPORT/o ) { |
957
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o) { |
958
|
|
|
|
|
|
|
$self->error("missing empty line after \"MOVIE RATINGS REPORT\" at line $lineCount"); |
959
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
960
|
|
|
|
|
|
|
return(-1); |
961
|
|
|
|
|
|
|
} |
962
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^New Distribution Votes Rank Title/o ) { |
963
|
|
|
|
|
|
|
$self->error("missing \"New Distribution Votes Rank Title\" at line $lineCount"); |
964
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
965
|
|
|
|
|
|
|
return(-1); |
966
|
|
|
|
|
|
|
} |
967
|
|
|
|
|
|
|
last; |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
elsif ( $lineCount > 1000 ) { |
970
|
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"MOVIE RATINGS REPORT\" line"); |
971
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
972
|
|
|
|
|
|
|
return(-1); |
973
|
|
|
|
|
|
|
} |
974
|
|
|
|
|
|
|
} |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Ratings", |
977
|
|
|
|
|
|
|
count => $countEstimate, |
978
|
|
|
|
|
|
|
ETA => 'linear'}) |
979
|
|
|
|
|
|
|
if ($self->{showProgressBar}); |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
982
|
|
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
983
|
|
|
|
|
|
|
my $next_update=0; |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
my $countImported=0; |
986
|
|
|
|
|
|
|
my $count=0; |
987
|
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Ratings VALUES (?,?,?,?)'); |
988
|
|
|
|
|
|
|
while(<$fh>) { |
989
|
|
|
|
|
|
|
$lineCount++; |
990
|
|
|
|
|
|
|
my $line=$_; |
991
|
|
|
|
|
|
|
#print "read line $lineCount:$line"; |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
$line=~s/\n$//o; |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
# skip empty lines (only really appear right before last line ending with ---- |
996
|
|
|
|
|
|
|
next if ( $line=~m/^\s*$/o ); |
997
|
|
|
|
|
|
|
# end is line consisting of only '-' |
998
|
|
|
|
|
|
|
last if ( $line=~m/^\-\-\-\-\-\-\-+/o ); |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
# e.g. New Distribution Votes Rank Title |
1001
|
|
|
|
|
|
|
# 0000000133 225568 8.9 12 Angry Men (1957) |
1002
|
|
|
|
|
|
|
if ( $line=~m/^\s+([\.|\*|\d]+)\s+(\d+)\s+(\d+\.\d+)\s+(.+)$/o ) { |
1003
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$4} ) { |
1005
|
|
|
|
|
|
|
$tableInsert_sth->execute($self->{imdbMovie2DBKey}->{$4}, $1, $2, $3); |
1006
|
|
|
|
|
|
|
$countImported++; |
1007
|
|
|
|
|
|
|
if ( ($countImported % 50000) == 0 ) { |
1008
|
|
|
|
|
|
|
$DB->commit(); |
1009
|
|
|
|
|
|
|
} |
1010
|
|
|
|
|
|
|
} |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
} |
1013
|
|
|
|
|
|
|
$count++; |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
#$self->{movies}{$line}=[$1,$2,"$3.$4"]; |
1016
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
1017
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
1018
|
|
|
|
|
|
|
if ( $count > $countEstimate ) { |
1019
|
|
|
|
|
|
|
$countEstimate = $progress->target($count+1000); |
1020
|
|
|
|
|
|
|
$next_update=$progress->update($count); |
1021
|
|
|
|
|
|
|
} |
1022
|
|
|
|
|
|
|
elsif ( $count > $next_update ) { |
1023
|
|
|
|
|
|
|
$next_update=$progress->update($count); |
1024
|
|
|
|
|
|
|
} |
1025
|
|
|
|
|
|
|
} |
1026
|
|
|
|
|
|
|
else { |
1027
|
|
|
|
|
|
|
$self->error("$file:$lineCount: unrecognized format"); |
1028
|
|
|
|
|
|
|
$next_update=$progress->update($count) if ($self->{showProgressBar}); |
1029
|
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
} |
1031
|
|
|
|
|
|
|
$progress->update($countEstimate) if ($self->{showProgressBar}); |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
$self->status(sprintf("importing Ratings found ".withThousands($count)." titles in ". |
1034
|
|
|
|
|
|
|
withThousands($lineCount)." lines in %d seconds",time()-$startTime)); |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
1037
|
|
|
|
|
|
|
return($count); |
1038
|
|
|
|
|
|
|
} |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
sub importKeywords($$$$) |
1041
|
|
|
|
|
|
|
{ |
1042
|
|
|
|
|
|
|
my ($self, $countEstimate, $file, $DB)=@_; |
1043
|
|
|
|
|
|
|
my $startTime=time(); |
1044
|
|
|
|
|
|
|
my $lineCount=0; |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
if ( $DB->table_row_count('Keywords') > 0 ) { |
1047
|
|
|
|
|
|
|
$self->status("clearing previously loaded data.."); |
1048
|
|
|
|
|
|
|
$DB->runSQL("DELETE from Keywords"); |
1049
|
|
|
|
|
|
|
} |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
1052
|
|
|
|
|
|
|
while(<$fh>) { |
1053
|
|
|
|
|
|
|
$lineCount++; |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
if ( m/THE KEYWORDS LIST/ ) { |
1056
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
1057
|
|
|
|
|
|
|
$self->error("missing ======= after \"THE KEYWORDS LIST\" at line $lineCount"); |
1058
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
1059
|
|
|
|
|
|
|
return(-1); |
1060
|
|
|
|
|
|
|
} |
1061
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^\s*$/o ) { |
1062
|
|
|
|
|
|
|
$self->error("missing empty line after ======= at line $lineCount"); |
1063
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
1064
|
|
|
|
|
|
|
return(-1); |
1065
|
|
|
|
|
|
|
} |
1066
|
|
|
|
|
|
|
last; |
1067
|
|
|
|
|
|
|
} |
1068
|
|
|
|
|
|
|
elsif ( $lineCount > 70000 ) { |
1069
|
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"THE KEYWORDS LIST\" line"); |
1070
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
1071
|
|
|
|
|
|
|
return(-1); |
1072
|
|
|
|
|
|
|
} |
1073
|
|
|
|
|
|
|
} |
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Keywords", |
1076
|
|
|
|
|
|
|
count => $countEstimate, |
1077
|
|
|
|
|
|
|
ETA => 'linear'}) |
1078
|
|
|
|
|
|
|
if ($self->{showProgressBar}); |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
1081
|
|
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
1082
|
|
|
|
|
|
|
my $next_update=0; |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
my $count=0; |
1085
|
|
|
|
|
|
|
my $countImported=0; |
1086
|
|
|
|
|
|
|
my %found; |
1087
|
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Keywords VALUES (?,?)'); |
1088
|
|
|
|
|
|
|
my $tableInsert_sth2=$DB->prepare('INSERT INTO Titles2Keywords VALUES (?,?)'); |
1089
|
|
|
|
|
|
|
my $keywordCount=0; |
1090
|
|
|
|
|
|
|
while(<$fh>) { |
1091
|
|
|
|
|
|
|
$lineCount++; |
1092
|
|
|
|
|
|
|
my $line=$_; |
1093
|
|
|
|
|
|
|
chomp($line); |
1094
|
|
|
|
|
|
|
next if ($line =~ m/^\s*$/); |
1095
|
|
|
|
|
|
|
my ($title, $keyword) = ($line =~ m/^(.*)\s+(\S+)\s*$/); |
1096
|
|
|
|
|
|
|
if ( defined($title) and defined($keyword) ) { |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
my ($episode) = $title =~ m/\s+(\{.*\})$/o; |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
# ignore anything which is an episode (e.g. "{Doctor Who (#10.22)}" ) |
1101
|
|
|
|
|
|
|
#if ( !defined $episode || $episode eq '' ) |
1102
|
|
|
|
|
|
|
#{ |
1103
|
|
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$title} ) { |
1104
|
|
|
|
|
|
|
if ( !defined($found{$keyword}) ) { |
1105
|
|
|
|
|
|
|
$found{$keyword}=$keywordCount++; |
1106
|
|
|
|
|
|
|
$tableInsert_sth->execute($keywordCount, $keyword); |
1107
|
|
|
|
|
|
|
#=$DB->insert_db('Keywords', 'KeywordID', Name=>$keyword); |
1108
|
|
|
|
|
|
|
} |
1109
|
|
|
|
|
|
|
$tableInsert_sth2->execute($self->{imdbMovie2DBKey}->{$title}, $found{$keyword}); |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
#$DB->insert_db('Titles2Keywords', undef, TitleID=>$self->{imdbMovie2DBKey}->{$title}, KeywordID=>$found{$keyword}); |
1112
|
|
|
|
|
|
|
$countImported++; |
1113
|
|
|
|
|
|
|
if ( ($countImported % 50000) == 0 ) { |
1114
|
|
|
|
|
|
|
$DB->commit(); |
1115
|
|
|
|
|
|
|
} |
1116
|
|
|
|
|
|
|
} |
1117
|
|
|
|
|
|
|
$count++; |
1118
|
|
|
|
|
|
|
#} |
1119
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
1120
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
1121
|
|
|
|
|
|
|
if ( $count > $countEstimate ) { |
1122
|
|
|
|
|
|
|
$countEstimate = $progress->target($count+1000); |
1123
|
|
|
|
|
|
|
$next_update=$progress->update($count); |
1124
|
|
|
|
|
|
|
} |
1125
|
|
|
|
|
|
|
elsif ( $count > $next_update ) { |
1126
|
|
|
|
|
|
|
$next_update=$progress->update($count); |
1127
|
|
|
|
|
|
|
} |
1128
|
|
|
|
|
|
|
} |
1129
|
|
|
|
|
|
|
} else { |
1130
|
|
|
|
|
|
|
#$self->error("$file:$lineCount: unrecognized format \"$line\""); |
1131
|
|
|
|
|
|
|
$next_update=$progress->update($count) if ($self->{showProgressBar}); |
1132
|
|
|
|
|
|
|
} |
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
} |
1135
|
|
|
|
|
|
|
$progress->update($countEstimate) if ($self->{showProgressBar}); |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
$self->status(sprintf("importing Keywords found ".withThousands($count)." titles in ". |
1138
|
|
|
|
|
|
|
withThousands($lineCount)." lines in %d seconds",time()-$startTime)); |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
1141
|
|
|
|
|
|
|
return($count); |
1142
|
|
|
|
|
|
|
} |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
sub importPlots($$$$) |
1145
|
|
|
|
|
|
|
{ |
1146
|
|
|
|
|
|
|
my ($self, $countEstimate, $file, $DB)=@_; |
1147
|
|
|
|
|
|
|
my $startTime=time(); |
1148
|
|
|
|
|
|
|
my $lineCount=0; |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
if ( $DB->table_row_count('Plots') > 0 ) { |
1151
|
|
|
|
|
|
|
$self->status("clearing previously loaded data.."); |
1152
|
|
|
|
|
|
|
$DB->runSQL("DELETE from Plots"); |
1153
|
|
|
|
|
|
|
} |
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
my $fh = openMaybeGunzip($file) || return(-2); |
1156
|
|
|
|
|
|
|
while(<$fh>) { |
1157
|
|
|
|
|
|
|
$lineCount++; |
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
if ( m/PLOT SUMMARIES LIST/ ) { |
1160
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^===========/o ) { |
1161
|
|
|
|
|
|
|
$self->error("missing ======= after \"PLOT SUMMARIES LIST\" at line $lineCount"); |
1162
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
1163
|
|
|
|
|
|
|
return(-1); |
1164
|
|
|
|
|
|
|
} |
1165
|
|
|
|
|
|
|
if ( !($_=<$fh>) || !m/^-----------/o ) { |
1166
|
|
|
|
|
|
|
$self->error("missing ------- line after ======= at line $lineCount"); |
1167
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
1168
|
|
|
|
|
|
|
return(-1); |
1169
|
|
|
|
|
|
|
} |
1170
|
|
|
|
|
|
|
last; |
1171
|
|
|
|
|
|
|
} |
1172
|
|
|
|
|
|
|
elsif ( $lineCount > 500 ) { |
1173
|
|
|
|
|
|
|
$self->error("$file: stopping at line $lineCount, didn't see \"PLOT SUMMARIES LIST\" line"); |
1174
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
1175
|
|
|
|
|
|
|
return(-1); |
1176
|
|
|
|
|
|
|
} |
1177
|
|
|
|
|
|
|
} |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
my $progress=Term::ProgressBar->new({name => "importing Plots", |
1180
|
|
|
|
|
|
|
count => $countEstimate, |
1181
|
|
|
|
|
|
|
ETA => 'linear'}) |
1182
|
|
|
|
|
|
|
if ($self->{showProgressBar}); |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
$progress->minor(0) if ($self->{showProgressBar}); |
1185
|
|
|
|
|
|
|
$progress->max_update_rate(1) if ($self->{showProgressBar}); |
1186
|
|
|
|
|
|
|
my $next_update=0; |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
my $count=0; |
1189
|
|
|
|
|
|
|
my $tableInsert_sth=$DB->prepare('INSERT INTO Plots VALUES (?,?)'); |
1190
|
|
|
|
|
|
|
while(<$fh>) { |
1191
|
|
|
|
|
|
|
$lineCount++; |
1192
|
|
|
|
|
|
|
my $line=$_; |
1193
|
|
|
|
|
|
|
chomp($line); |
1194
|
|
|
|
|
|
|
next if ($line =~ m/^\s*$/); |
1195
|
|
|
|
|
|
|
next if ($line=~m/\s*\{\{SUSPENDED\}\}/o); |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
my ($title, $episode) = ($line =~ m/^MV:\s(.*?)\s?(\{.*\})?$/); |
1198
|
|
|
|
|
|
|
if ( defined($title) ) { |
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
$line =~s/^MV:\s*//; |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
my $plot = ''; |
1203
|
|
|
|
|
|
|
LOOP: |
1204
|
|
|
|
|
|
|
while (1) { |
1205
|
|
|
|
|
|
|
if ( my $l = <$fh> ) { |
1206
|
|
|
|
|
|
|
$lineCount++; |
1207
|
|
|
|
|
|
|
chomp($l); |
1208
|
|
|
|
|
|
|
next if ($l =~ m/^\s*$/); |
1209
|
|
|
|
|
|
|
if ( $l =~ m/PL:\s(.*)$/ ) { # plot summary is a number of lines starting "PL:" |
1210
|
|
|
|
|
|
|
$plot .= ($plot ne ''?' ':'') . $1; |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
last LOOP if ( $l =~ m/BY:\s(.*)$/ ); # the author line "BY:" signals the end of the plot summary |
1213
|
|
|
|
|
|
|
} else { |
1214
|
|
|
|
|
|
|
last LOOP; |
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
} |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
if ( $self->{imdbMovie2DBKey}->{$line} ) { |
1219
|
|
|
|
|
|
|
$tableInsert_sth->execute($self->{imdbMovie2DBKey}->{$line}, $plot); |
1220
|
|
|
|
|
|
|
$count++; |
1221
|
|
|
|
|
|
|
if ( ($count % 50000) == 0 ) { |
1222
|
|
|
|
|
|
|
$DB->commit(); |
1223
|
|
|
|
|
|
|
} |
1224
|
|
|
|
|
|
|
} |
1225
|
|
|
|
|
|
|
else { |
1226
|
|
|
|
|
|
|
#warn "$line"; |
1227
|
|
|
|
|
|
|
} |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
if ( $self->{showProgressBar} ) { |
1230
|
|
|
|
|
|
|
# re-adjust target so progress bar doesn't seem too wonky |
1231
|
|
|
|
|
|
|
if ( $count > $countEstimate ) { |
1232
|
|
|
|
|
|
|
$countEstimate = $progress->target($count+1000); |
1233
|
|
|
|
|
|
|
$next_update=$progress->update($count); |
1234
|
|
|
|
|
|
|
} |
1235
|
|
|
|
|
|
|
elsif ( $count > $next_update ) { |
1236
|
|
|
|
|
|
|
$next_update=$progress->update($count); |
1237
|
|
|
|
|
|
|
} |
1238
|
|
|
|
|
|
|
} |
1239
|
|
|
|
|
|
|
} else { |
1240
|
|
|
|
|
|
|
# skip lines up to the next "MV:" |
1241
|
|
|
|
|
|
|
if ($line !~ m/^(---|PL:|BY:)/ ) { |
1242
|
|
|
|
|
|
|
$self->error("$file:$lineCount: unrecognized format \"$line\""); |
1243
|
|
|
|
|
|
|
} |
1244
|
|
|
|
|
|
|
$next_update=$progress->update($count) if ($self->{showProgressBar}); |
1245
|
|
|
|
|
|
|
} |
1246
|
|
|
|
|
|
|
} |
1247
|
|
|
|
|
|
|
$progress->update($countEstimate) if ($self->{showProgressBar}); |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
$self->status(sprintf("importing Plots found $count ".withThousands($count)." in ". |
1250
|
|
|
|
|
|
|
withThousands($lineCount)." lines in %d seconds",time()-$startTime)); |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
closeMaybeGunzip($file, $fh); |
1253
|
|
|
|
|
|
|
return($count); |
1254
|
|
|
|
|
|
|
} |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
sub loadDBInfo($) |
1257
|
|
|
|
|
|
|
{ |
1258
|
|
|
|
|
|
|
my $file=shift; |
1259
|
|
|
|
|
|
|
my $info; |
1260
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
open(INFO, "< $file") || return("imdbDir index file \"$file\":$!"); |
1262
|
|
|
|
|
|
|
while() { |
1263
|
|
|
|
|
|
|
chop(); |
1264
|
|
|
|
|
|
|
if ( s/^([^:]+)://o ) { |
1265
|
|
|
|
|
|
|
$info->{$1}=$_; |
1266
|
|
|
|
|
|
|
} |
1267
|
|
|
|
|
|
|
} |
1268
|
|
|
|
|
|
|
close(INFO); |
1269
|
|
|
|
|
|
|
return($info); |
1270
|
|
|
|
|
|
|
} |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
sub dbinfoLoad($) |
1273
|
|
|
|
|
|
|
{ |
1274
|
|
|
|
|
|
|
my $self=shift; |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
my $info=loadDBInfo($self->{moviedbInfo}); |
1277
|
|
|
|
|
|
|
if ( ref $info ne 'HASH' ) { |
1278
|
|
|
|
|
|
|
return(1); |
1279
|
|
|
|
|
|
|
} |
1280
|
|
|
|
|
|
|
$self->{dbinfo}=$info; |
1281
|
|
|
|
|
|
|
return(undef); |
1282
|
|
|
|
|
|
|
} |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
sub dbinfoAdd($$$) |
1285
|
|
|
|
|
|
|
{ |
1286
|
|
|
|
|
|
|
my ($self, $key, $value)=@_; |
1287
|
|
|
|
|
|
|
$self->{dbinfo}->{$key}=$value; |
1288
|
|
|
|
|
|
|
} |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
sub dbinfoGet($$$) |
1291
|
|
|
|
|
|
|
{ |
1292
|
|
|
|
|
|
|
my ($self, $key, $defaultValue)=@_; |
1293
|
|
|
|
|
|
|
if ( defined($self->{dbinfo}->{$key}) ) { |
1294
|
|
|
|
|
|
|
return($self->{dbinfo}->{$key}); |
1295
|
|
|
|
|
|
|
} |
1296
|
|
|
|
|
|
|
return($defaultValue); |
1297
|
|
|
|
|
|
|
} |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
sub dbinfoSave($) |
1300
|
|
|
|
|
|
|
{ |
1301
|
|
|
|
|
|
|
my $self=shift; |
1302
|
|
|
|
|
|
|
open(INFO, "> $self->{moviedbInfo}") || return(1); |
1303
|
|
|
|
|
|
|
for (sort keys %{$self->{dbinfo}}) { |
1304
|
|
|
|
|
|
|
print INFO "".$_.":".$self->{dbinfo}->{$_}."\n"; |
1305
|
|
|
|
|
|
|
} |
1306
|
|
|
|
|
|
|
close(INFO); |
1307
|
|
|
|
|
|
|
return(0); |
1308
|
|
|
|
|
|
|
} |
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
sub dbinfoGetFileSize($$) |
1311
|
|
|
|
|
|
|
{ |
1312
|
|
|
|
|
|
|
my ($self, $key)=@_; |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
if ( !defined($self->{listFiles}->paths_isset($key) ) ) { |
1316
|
|
|
|
|
|
|
die ("invalid call for $key"); |
1317
|
|
|
|
|
|
|
} |
1318
|
|
|
|
|
|
|
my $filePath=$self->{listFiles}->paths_index($key); |
1319
|
|
|
|
|
|
|
my $fileSize=int(-s $filePath); |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
# if compressed, then attempt to run gzip -l |
1322
|
|
|
|
|
|
|
if ( $filePath=~m/.gz$/) { |
1323
|
|
|
|
|
|
|
if ( open(my $fd, "gzip -l $filePath |") ) { |
1324
|
|
|
|
|
|
|
# if parse fails, then defalt to wild ass guess of compression of 65% |
1325
|
|
|
|
|
|
|
$fileSize=int(($fileSize*100)/(100-65)); |
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
while(<$fd>) { |
1328
|
|
|
|
|
|
|
if ( m/^\s*\d+\s+(\d+)/ ) { |
1329
|
|
|
|
|
|
|
$fileSize=$1; |
1330
|
|
|
|
|
|
|
} |
1331
|
|
|
|
|
|
|
} |
1332
|
|
|
|
|
|
|
close($fd); |
1333
|
|
|
|
|
|
|
} |
1334
|
|
|
|
|
|
|
else { |
1335
|
|
|
|
|
|
|
# wild ass guess of compression of 65% |
1336
|
|
|
|
|
|
|
$fileSize=int(($fileSize*100)/(100-65)); |
1337
|
|
|
|
|
|
|
} |
1338
|
|
|
|
|
|
|
} |
1339
|
|
|
|
|
|
|
return($fileSize); |
1340
|
|
|
|
|
|
|
} |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
sub _redirect($$) |
1343
|
|
|
|
|
|
|
{ |
1344
|
|
|
|
|
|
|
my ($self, $file)=@_; |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
if ( defined($file) ) { |
1347
|
|
|
|
|
|
|
if ( !open($self->{logfd}, "> $file") ) { |
1348
|
|
|
|
|
|
|
print STDERR "$file:$!\n"; |
1349
|
|
|
|
|
|
|
return(0); |
1350
|
|
|
|
|
|
|
} |
1351
|
|
|
|
|
|
|
$self->{errorCountInLog}=0; |
1352
|
|
|
|
|
|
|
} |
1353
|
|
|
|
|
|
|
else { |
1354
|
|
|
|
|
|
|
close($self->{logfd}); |
1355
|
|
|
|
|
|
|
$self->{logfd}=undef; |
1356
|
|
|
|
|
|
|
} |
1357
|
|
|
|
|
|
|
return(1); |
1358
|
|
|
|
|
|
|
} |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
=head2 importListComplete |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
Check to see if spcified list file has been successfully imported |
1363
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
=cut |
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
sub importListComplete($) |
1367
|
|
|
|
|
|
|
{ |
1368
|
|
|
|
|
|
|
my ($self, $type)=@_; |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
if ( -f "$self->{imdbDir}/stage-$type.log" ) { |
1371
|
|
|
|
|
|
|
return(1); |
1372
|
|
|
|
|
|
|
} |
1373
|
|
|
|
|
|
|
return(0); |
1374
|
|
|
|
|
|
|
} |
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
sub _prepStage |
1377
|
|
|
|
|
|
|
{ |
1378
|
|
|
|
|
|
|
my ($self, $type)=@_; |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
my $DB=new IMDB::Local::DB(database=>"$self->{imdbDir}/imdb.db"); |
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
# if we're restarting, lets start fresh |
1383
|
|
|
|
|
|
|
if ( $type eq 'movies' ) { |
1384
|
|
|
|
|
|
|
#warn("recreating db ".$DB->database()); |
1385
|
|
|
|
|
|
|
$DB->delete(); |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
for my $type ( $self->listTypes() ) { |
1388
|
|
|
|
|
|
|
unlink("$self->{imdbDir}/stage-$type.log"); |
1389
|
|
|
|
|
|
|
} |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
} |
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
if ( !$self->_redirect(sprintf("%s/stage-$type.log", $self->{imdbDir})) ) { |
1394
|
|
|
|
|
|
|
return(1); |
1395
|
|
|
|
|
|
|
} |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
if ( !$DB->connect() ) { |
1398
|
|
|
|
|
|
|
die "moviedb connect failed:$DBI::errstr"; |
1399
|
|
|
|
|
|
|
} |
1400
|
|
|
|
|
|
|
$DB->runSQL("PRAGMA synchronous = OFF"); |
1401
|
|
|
|
|
|
|
return($DB); |
1402
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
} |
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
sub _unprepStage |
1406
|
|
|
|
|
|
|
{ |
1407
|
|
|
|
|
|
|
my ($self, $db)=@_; |
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
$db->commit(); |
1410
|
|
|
|
|
|
|
$db->disconnect(); |
1411
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
$self->_redirect(undef); |
1413
|
|
|
|
|
|
|
} |
1414
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
sub importListFile($$$) |
1416
|
|
|
|
|
|
|
{ |
1417
|
|
|
|
|
|
|
my ($self, $DB, $type)=@_; |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
if ( !grep(/^$type$/, $self->listTypes()) ) { |
1421
|
|
|
|
|
|
|
die "invalid type $type"; |
1422
|
|
|
|
|
|
|
} |
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
my $dbinfoCalcEstimate=sub { |
1425
|
|
|
|
|
|
|
my ($self, $key)=@_; |
1426
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
my %estimateSizePerEntry=(movies=>47, |
1428
|
|
|
|
|
|
|
directors=>258, |
1429
|
|
|
|
|
|
|
actors=>695, |
1430
|
|
|
|
|
|
|
actresses=>779, |
1431
|
|
|
|
|
|
|
genres=>38, |
1432
|
|
|
|
|
|
|
ratings=>68, |
1433
|
|
|
|
|
|
|
keywords=>47, |
1434
|
|
|
|
|
|
|
plot=>804); |
1435
|
|
|
|
|
|
|
my $fileSize=$self->dbinfoGetFileSize($key); |
1436
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
my $countEstimate=int($fileSize/$estimateSizePerEntry{$key}); |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
my $filePath=$self->{listFiles}->paths_index($key); |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
$self->dbinfoAdd($key."_list_file", $filePath); |
1443
|
|
|
|
|
|
|
$self->dbinfoAdd($key."_list_file_size", "".int(-s $filePath)); |
1444
|
|
|
|
|
|
|
$self->dbinfoAdd($key."_list_file_size_uncompressed", $fileSize); |
1445
|
|
|
|
|
|
|
$self->dbinfoAdd($key."_list_count_estimate", $countEstimate); |
1446
|
|
|
|
|
|
|
return($countEstimate); |
1447
|
|
|
|
|
|
|
}; |
1448
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
my $dbinfoCalcBytesPerEntry = sub { |
1450
|
|
|
|
|
|
|
my ($self, $key, $calcActualForThisNumber)=@_; |
1451
|
|
|
|
|
|
|
my $fileSize=$self->dbinfoGetFileSize($key); |
1452
|
|
|
|
|
|
|
return(int($fileSize/$calcActualForThisNumber)); |
1453
|
|
|
|
|
|
|
}; |
1454
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
if ( $type eq 'movies') { |
1457
|
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "movies"); |
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
my $num=$self->importMovies($countEstimate, $self->{listFiles}->paths_index('movies'), $DB); |
1460
|
|
|
|
|
|
|
if ( $num < 0 ) { |
1461
|
|
|
|
|
|
|
if ( $num == -2 ) { |
1462
|
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('movies')." from ftp.imdb.com"); |
1463
|
|
|
|
|
|
|
} |
1464
|
|
|
|
|
|
|
return(1); |
1465
|
|
|
|
|
|
|
} |
1466
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
1467
|
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "movies", $num); |
1468
|
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for movies needs updating, found $num ($better bytes/entry)"); |
1469
|
|
|
|
|
|
|
} |
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
open(OUT, "> $self->{imdbDir}/titles.tsv") || die "$self->{imdbDir}/titles.tsv:$!"; |
1472
|
|
|
|
|
|
|
for my $mkey (sort keys %{$self->{imdbMovie2DBKey}}) { |
1473
|
|
|
|
|
|
|
print OUT "".$self->{imdbMovie2DBKey}->{$mkey}."\t".$mkey."\n"; |
1474
|
|
|
|
|
|
|
} |
1475
|
|
|
|
|
|
|
close(OUT); |
1476
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_movie_count", "$num"); |
1478
|
|
|
|
|
|
|
return(0); |
1479
|
|
|
|
|
|
|
} |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
# read in keys so we have them for follow-up stages |
1482
|
|
|
|
|
|
|
if ( !defined($self->{imdbMovie2DBKey}) ) { |
1483
|
|
|
|
|
|
|
#$self->{imdbMovie2DBKey}=$DB->select2Hash("select IMDBKey, TitleID from Titles"); |
1484
|
|
|
|
|
|
|
|
1485
|
|
|
|
|
|
|
if ( 1 ) { |
1486
|
|
|
|
|
|
|
open(IN, "< $self->{imdbDir}/titles.tsv") || die "$self->{imdbDir}/titles.tsv:$!"; |
1487
|
|
|
|
|
|
|
while () { |
1488
|
|
|
|
|
|
|
chomp(); |
1489
|
|
|
|
|
|
|
if ( m/^(\d+)\t(.+)/o ) { |
1490
|
|
|
|
|
|
|
$self->{imdbMovie2DBKey}->{$2}=$1; |
1491
|
|
|
|
|
|
|
} |
1492
|
|
|
|
|
|
|
} |
1493
|
|
|
|
|
|
|
close(IN); |
1494
|
|
|
|
|
|
|
} |
1495
|
|
|
|
|
|
|
} |
1496
|
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
# need to read-movie kesy |
1498
|
|
|
|
|
|
|
if ( $type eq 'directors') { |
1499
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "directors"); |
1501
|
|
|
|
|
|
|
|
1502
|
|
|
|
|
|
|
my $num=$self->importDirectors($countEstimate, $self->{listFiles}->paths_index('directors'), $DB); |
1503
|
|
|
|
|
|
|
if ( $num < 0 ) { |
1504
|
|
|
|
|
|
|
if ( $num == -2 ) { |
1505
|
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('directors')." from ftp.imdb.com (see http://www.imdb.com/interfaces)"); |
1506
|
|
|
|
|
|
|
} |
1507
|
|
|
|
|
|
|
return(1); |
1508
|
|
|
|
|
|
|
} |
1509
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
1510
|
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "directors", $num); |
1511
|
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for directors needs updating, found $num ($better bytes/entry)"); |
1512
|
|
|
|
|
|
|
} |
1513
|
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_director_count", "$num"); |
1514
|
|
|
|
|
|
|
return(0); |
1515
|
|
|
|
|
|
|
} |
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
if ( $type eq 'actors') { |
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
#print "re-reading movies into memory for reverse lookup..\n"; |
1520
|
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "actors"); |
1521
|
|
|
|
|
|
|
|
1522
|
|
|
|
|
|
|
#my $num=$self->readCast("Actors", $countEstimate, "$self->{imdbListFiles}->{actors}"); |
1523
|
|
|
|
|
|
|
my $num=$self->importActors("Actors", $countEstimate, $self->{listFiles}->paths_index('actors'), $DB); |
1524
|
|
|
|
|
|
|
if ( $num < 0 ) { |
1525
|
|
|
|
|
|
|
if ( $num == -2 ) { |
1526
|
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('actors')." from ftp.imdb.com (see http://www.imdb.com/interfaces)"); |
1527
|
|
|
|
|
|
|
} |
1528
|
|
|
|
|
|
|
return(1); |
1529
|
|
|
|
|
|
|
} |
1530
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
1531
|
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "actors", $num); |
1532
|
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for actors needs updating, found $num ($better bytes/entry)"); |
1533
|
|
|
|
|
|
|
} |
1534
|
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_actor_count", "$num"); |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
return(0); |
1537
|
|
|
|
|
|
|
} |
1538
|
|
|
|
|
|
|
|
1539
|
|
|
|
|
|
|
if ( $type eq 'actresses') { |
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "actresses"); |
1542
|
|
|
|
|
|
|
my $num=$self->importActors("Actresses", $countEstimate, $self->{listFiles}->paths_index('actresses'), $DB); |
1543
|
|
|
|
|
|
|
if ( $num < 0 ) { |
1544
|
|
|
|
|
|
|
if ( $num == -2 ) { |
1545
|
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('actresses')." from ftp.imdb.com (see http://www.imdb.com/interfaces)"); |
1546
|
|
|
|
|
|
|
} |
1547
|
|
|
|
|
|
|
return(1); |
1548
|
|
|
|
|
|
|
} |
1549
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
1550
|
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "actresses", $num); |
1551
|
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for actresses needs updating, found $num ($better bytes/entry)"); |
1552
|
|
|
|
|
|
|
} |
1553
|
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_actress_count", "$num"); |
1554
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
return(0); |
1556
|
|
|
|
|
|
|
} |
1557
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
if ( $type eq 'genres') { |
1559
|
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "genres"); |
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
my $num=$self->importGenres($countEstimate, $self->{listFiles}->paths_index('genres'), $DB); |
1562
|
|
|
|
|
|
|
if ( $num < 0 ) { |
1563
|
|
|
|
|
|
|
if ( $num == -2 ) { |
1564
|
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('genres')." from ftp.imdb.com"); |
1565
|
|
|
|
|
|
|
} |
1566
|
|
|
|
|
|
|
return(1); |
1567
|
|
|
|
|
|
|
} |
1568
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
1569
|
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "genres", $num); |
1570
|
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for genres needs updating, found $num ($better bytes/entry)"); |
1571
|
|
|
|
|
|
|
} |
1572
|
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_genres_count", "$num"); |
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
return(0); |
1575
|
|
|
|
|
|
|
} |
1576
|
|
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
if ( $type eq 'ratings') { |
1578
|
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "ratings"); |
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
my $num=$self->importRatings($countEstimate, $self->{listFiles}->paths_index('ratings'), $DB); |
1581
|
|
|
|
|
|
|
if ( $num < 0 ) { |
1582
|
|
|
|
|
|
|
if ( $num == -2 ) { |
1583
|
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('ratings')." from ftp.imdb.com"); |
1584
|
|
|
|
|
|
|
} |
1585
|
|
|
|
|
|
|
return(1); |
1586
|
|
|
|
|
|
|
} |
1587
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.10 ) { |
1588
|
|
|
|
|
|
|
my $better=&$dbinfoCalcBytesPerEntry($self, "ratings", $num); |
1589
|
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for ratings needs updating, found $num ($better bytes/entry)"); |
1590
|
|
|
|
|
|
|
} |
1591
|
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_ratings_count", "$num"); |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
return(0); |
1594
|
|
|
|
|
|
|
} |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
if ( $type eq 'keywords') { |
1597
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
if ( ! -f $self->{listFiles}->paths_index('keywords') ) { |
1599
|
|
|
|
|
|
|
$self->status("no keywords file downloaded, see --with-keywords details in documentation"); |
1600
|
|
|
|
|
|
|
return(0); |
1601
|
|
|
|
|
|
|
} |
1602
|
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "keywords"); |
1603
|
|
|
|
|
|
|
#my $countEstimate=5554178; |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
my $num=$self->importKeywords($countEstimate, $self->{listFiles}->paths_index('keywords'), $DB); |
1606
|
|
|
|
|
|
|
if ( $num < 0 ) { |
1607
|
|
|
|
|
|
|
if ( $num == -2 ) { |
1608
|
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('keywords')." from ftp.imdb.com"); |
1609
|
|
|
|
|
|
|
} |
1610
|
|
|
|
|
|
|
return(1); |
1611
|
|
|
|
|
|
|
} |
1612
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) { |
1613
|
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for keywords needs updating, found $num"); |
1614
|
|
|
|
|
|
|
} |
1615
|
|
|
|
|
|
|
$self->dbinfoAdd("keywords_list_file", $self->{listFiles}->paths_index('keywords')); |
1616
|
|
|
|
|
|
|
$self->dbinfoAdd("keywords_list_file_size", -s $self->{listFiles}->paths_index('keywords')); |
1617
|
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_keywords_count", "$num"); |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
return(0); |
1620
|
|
|
|
|
|
|
} |
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
if ( $type eq 'plot') { |
1623
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
if ( ! -f $self->{listFiles}->paths_index('plot') ) { |
1625
|
|
|
|
|
|
|
$self->status("no plot file downloaded, see --with-plot details in documentation"); |
1626
|
|
|
|
|
|
|
return(0); |
1627
|
|
|
|
|
|
|
} |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
my $countEstimate=&$dbinfoCalcEstimate($self, "plot"); |
1630
|
|
|
|
|
|
|
my $num=$self->importPlots($countEstimate, $self->{listFiles}->paths_index('plot'), $DB); |
1631
|
|
|
|
|
|
|
if ( $num < 0 ) { |
1632
|
|
|
|
|
|
|
if ( $num == -2 ) { |
1633
|
|
|
|
|
|
|
$self->error("you need to download ".$self->{listFiles}->paths_index('plot')." from ftp.imdb.com"); |
1634
|
|
|
|
|
|
|
} |
1635
|
|
|
|
|
|
|
return(1); |
1636
|
|
|
|
|
|
|
} |
1637
|
|
|
|
|
|
|
elsif ( abs($num - $countEstimate) > $countEstimate*.05 ) { |
1638
|
|
|
|
|
|
|
$self->status("ARG estimate of $countEstimate for plots needs updating, found $num"); |
1639
|
|
|
|
|
|
|
} |
1640
|
|
|
|
|
|
|
$self->dbinfoAdd("plots_list_file", $self->{listFiles}->paths_index('plot')); |
1641
|
|
|
|
|
|
|
$self->dbinfoAdd("plots_list_file_size", -s $self->{listFiles}->paths_index('plot')); |
1642
|
|
|
|
|
|
|
$self->dbinfoAdd("db_stat_plots_count", "$num"); |
1643
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
return(0); |
1645
|
|
|
|
|
|
|
} |
1646
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
$self->error("invalid type $type"); |
1648
|
|
|
|
|
|
|
return(1); |
1649
|
|
|
|
|
|
|
} |
1650
|
|
|
|
|
|
|
|
1651
|
|
|
|
|
|
|
=head2 importList |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
Import a list file from 'listsDir' into the IMDB::Local Database. |
1654
|
|
|
|
|
|
|
|
1655
|
|
|
|
|
|
|
=cut |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
sub importList($$) |
1658
|
|
|
|
|
|
|
{ |
1659
|
|
|
|
|
|
|
my ($self, $type)=@_; |
1660
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
my $DB=$self->_prepStage($type); |
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
# lets load our stats |
1664
|
|
|
|
|
|
|
$self->dbinfoLoad(); |
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
my $startTime=time(); |
1667
|
|
|
|
|
|
|
if ( $self->importListFile($DB, $type) != 0 ) { |
1668
|
|
|
|
|
|
|
$DB->disconnect(); |
1669
|
|
|
|
|
|
|
return(1); |
1670
|
|
|
|
|
|
|
} |
1671
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
$self->dbinfoAdd("seconds_to_complete_prep_stage_$type", (time()-$startTime)); |
1673
|
|
|
|
|
|
|
$self->dbinfoSave(); |
1674
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
$self->_unprepStage($DB); |
1676
|
|
|
|
|
|
|
return(0); |
1677
|
|
|
|
|
|
|
} |
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
sub _NOT_USED_checkSantity($) |
1681
|
|
|
|
|
|
|
{ |
1682
|
|
|
|
|
|
|
my ($self)=@_; |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
$self->dbinfoAdd("db_version", $IMDB::Local::VERSION); |
1685
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
if ( $self->dbinfoSave() ) { |
1687
|
|
|
|
|
|
|
$self->error("$self->{moviedbInfo}:$!"); |
1688
|
|
|
|
|
|
|
return(1); |
1689
|
|
|
|
|
|
|
} |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
$self->status("running quick sanity check on database indexes..."); |
1692
|
|
|
|
|
|
|
my $imdb=new IMDB::Local('imdbDir' => $self->{imdbDir}, |
1693
|
|
|
|
|
|
|
'verbose' => $self->{verbose}); |
1694
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
if ( -e "$self->{moviedbOffline}" ) { |
1696
|
|
|
|
|
|
|
unlink("$self->{moviedbOffline}"); |
1697
|
|
|
|
|
|
|
} |
1698
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
if ( my $errline=$imdb->sanityCheckDatabase() ) { |
1700
|
|
|
|
|
|
|
open(OFF, "> $self->{moviedbOffline}") || die "$self->{moviedbOffline}:$!"; |
1701
|
|
|
|
|
|
|
print OFF $errline."\n"; |
1702
|
|
|
|
|
|
|
print OFF "one of the prep stages' must have produced corrupt data\n"; |
1703
|
|
|
|
|
|
|
print OFF "report the following details to xmltv-devel\@lists.sf.net\n"; |
1704
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
my $info=loadDBInfo($self->{moviedbInfo}); |
1706
|
|
|
|
|
|
|
if ( ref $info eq 'HASH' ) { |
1707
|
|
|
|
|
|
|
for my $key (sort keys %{$info}) { |
1708
|
|
|
|
|
|
|
print OFF "\t$key:$info->{$key}\n"; |
1709
|
|
|
|
|
|
|
} |
1710
|
|
|
|
|
|
|
} |
1711
|
|
|
|
|
|
|
else { |
1712
|
|
|
|
|
|
|
print OFF "\tdbinfo file corrupt\n"; |
1713
|
|
|
|
|
|
|
print OFF "\t$info"; |
1714
|
|
|
|
|
|
|
} |
1715
|
|
|
|
|
|
|
print OFF "database taken offline\n"; |
1716
|
|
|
|
|
|
|
close(OFF); |
1717
|
|
|
|
|
|
|
open(OFF, "< $self->{moviedbOffline}") || die "$self->{moviedbOffline}:$!"; |
1718
|
|
|
|
|
|
|
while() { |
1719
|
|
|
|
|
|
|
chop(); |
1720
|
|
|
|
|
|
|
$self->error($_); |
1721
|
|
|
|
|
|
|
} |
1722
|
|
|
|
|
|
|
close(OFF); |
1723
|
|
|
|
|
|
|
return(1); |
1724
|
|
|
|
|
|
|
} |
1725
|
|
|
|
|
|
|
$self->status("sanity intact :)"); |
1726
|
|
|
|
|
|
|
return(0); |
1727
|
|
|
|
|
|
|
} |
1728
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
=head1 AUTHOR |
1730
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
jerryv, C<< >> |
1732
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
=head1 BUGS |
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
1736
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
1737
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
1738
|
|
|
|
|
|
|
|
1739
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
=head1 SUPPORT |
1743
|
|
|
|
|
|
|
|
1744
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
1745
|
|
|
|
|
|
|
|
1746
|
|
|
|
|
|
|
perldoc IMDB::Local |
1747
|
|
|
|
|
|
|
|
1748
|
|
|
|
|
|
|
|
1749
|
|
|
|
|
|
|
You can also look for information at: |
1750
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
=over 4 |
1752
|
|
|
|
|
|
|
|
1753
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
1754
|
|
|
|
|
|
|
|
1755
|
|
|
|
|
|
|
L |
1756
|
|
|
|
|
|
|
|
1757
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
1758
|
|
|
|
|
|
|
|
1759
|
|
|
|
|
|
|
L |
1760
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
=item * CPAN Ratings |
1762
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
L |
1764
|
|
|
|
|
|
|
|
1765
|
|
|
|
|
|
|
=item * Search CPAN |
1766
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
L |
1768
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
=back |
1770
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
|
1772
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
1776
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
Copyright 2015 jerryv. |
1778
|
|
|
|
|
|
|
|
1779
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
1780
|
|
|
|
|
|
|
under the terms of the the Artistic License (2.0). You may obtain a |
1781
|
|
|
|
|
|
|
copy of the full license at: |
1782
|
|
|
|
|
|
|
|
1783
|
|
|
|
|
|
|
L |
1784
|
|
|
|
|
|
|
|
1785
|
|
|
|
|
|
|
Any use, modification, and distribution of the Standard or Modified |
1786
|
|
|
|
|
|
|
Versions is governed by this Artistic License. By using, modifying or |
1787
|
|
|
|
|
|
|
distributing the Package, you accept this license. Do not use, modify, |
1788
|
|
|
|
|
|
|
or distribute the Package, if you do not accept this license. |
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
If your Modified Version has been derived from a Modified Version made |
1791
|
|
|
|
|
|
|
by someone other than you, you are nevertheless required to ensure that |
1792
|
|
|
|
|
|
|
your Modified Version complies with the requirements of this license. |
1793
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
This license does not grant you the right to use any trademark, service |
1795
|
|
|
|
|
|
|
mark, tradename, or logo of the Copyright Holder. |
1796
|
|
|
|
|
|
|
|
1797
|
|
|
|
|
|
|
This license includes the non-exclusive, worldwide, free-of-charge |
1798
|
|
|
|
|
|
|
patent license to make, have made, use, offer to sell, sell, import and |
1799
|
|
|
|
|
|
|
otherwise transfer the Package with respect to any patent claims |
1800
|
|
|
|
|
|
|
licensable by the Copyright Holder that are necessarily infringed by the |
1801
|
|
|
|
|
|
|
Package. If you institute patent litigation (including a cross-claim or |
1802
|
|
|
|
|
|
|
counterclaim) against any party alleging that the Package constitutes |
1803
|
|
|
|
|
|
|
direct or contributory patent infringement, then this Artistic License |
1804
|
|
|
|
|
|
|
to you shall terminate on the date that such litigation is filed. |
1805
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER |
1807
|
|
|
|
|
|
|
AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. |
1808
|
|
|
|
|
|
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
1809
|
|
|
|
|
|
|
PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY |
1810
|
|
|
|
|
|
|
YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR |
1811
|
|
|
|
|
|
|
CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR |
1812
|
|
|
|
|
|
|
CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, |
1813
|
|
|
|
|
|
|
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
1814
|
|
|
|
|
|
|
|
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
=cut |
1817
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
1; # End of IMDB::Local |