| blib/lib/WWW/Sucksub/Extratitles.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 22 | 24 | 91.6 |
| branch | n/a | ||
| condition | n/a | ||
| subroutine | 8 | 8 | 100.0 |
| pod | n/a | ||
| total | 30 | 32 | 93.7 |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | package WWW::Sucksub::Extratitles; | ||||||
| 2 | |||||||
| 3 | |||||||
| 4 | =head1 NAME | ||||||
| 5 | |||||||
| 6 | WWW::Sucksub::Extratitles - automated access to Extratitles.com | ||||||
| 7 | |||||||
| 8 | =head1 VERSION | ||||||
| 9 | |||||||
| 10 | Version 0.01 | ||||||
| 11 | |||||||
| 12 | =cut | ||||||
| 13 | |||||||
| 14 | our $VERSION = '0.01'; | ||||||
| 15 | |||||||
| 16 | =head1 SYNOPSIS | ||||||
| 17 | |||||||
| 18 | SuckSub::Extratitles is a web automat based on the WWW::Mechanize Module | ||||||
| 19 | This module search and collect distant result on the Extratitles.com database. | ||||||
| 20 | Subtitles Files are very little files, Sucksub::Divstation store all results | ||||||
| 21 | in a dbm file that you can exploit to retrieve any subtitles information. | ||||||
| 22 | |||||||
| 23 | |||||||
| 24 | |||||||
| 25 | use WWW::Sucksub::Extratitles; | ||||||
| 26 | my $foo = WWW::Sucksub::Extratitles->new( | ||||||
| 27 | dbfile=> '/where/your/DBM/file is.db', | ||||||
| 28 | html =>'/where/your/html/repport/is.html', | ||||||
| 29 | motif=> 'the word(s) you search', | ||||||
| 30 | debug=> 1, | ||||||
| 31 | language=>'English' | ||||||
| 32 | logout => '/where/your/debug/info/are/written.log', ); | ||||||
| 33 | $foo->update(); # collect all link corresponding to the $foo->motif() | ||||||
| 34 | $foo->motif('x'); # modify the search criteria | ||||||
| 35 | $foo->search(); # launch a search on the local database | ||||||
| 36 | |||||||
| 37 | |||||||
| 38 | |||||||
| 39 | =head1 CONSTRUCTOR AND STARTUP | ||||||
| 40 | |||||||
| 41 | =head2 Extratitles Constructor | ||||||
| 42 | |||||||
| 43 | The new() constructor, is associated to default values : | ||||||
| 44 | you can modify these one as shown in the synopsis example. | ||||||
| 45 | |||||||
| 46 | my $foo = WWW::Sucksub::Extratitles->new( | ||||||
| 47 | html=> "$ENV{HOME}"."/sksb_Extratitles_report.html", | ||||||
| 48 | dbfile=> "$ENV{HOME}"."/sksb_Extratitles_db.db", | ||||||
| 49 | motif=> undef, | ||||||
| 50 | debug=> 0, | ||||||
| 51 | language => 'English' | ||||||
| 52 | logout => undef, # i.e. *STDOUT | ||||||
| 53 | useragent=> "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007" | ||||||
| 54 | ); | ||||||
| 55 | |||||||
| 56 | =head3 new() constructor attributes and associated methods | ||||||
| 57 | |||||||
| 58 | Few attributes can be set thru new() contructor's attributes. | ||||||
| 59 | All attributes can be modified by corresponding methods: | ||||||
| 60 | |||||||
| 61 | $foo->WWW::Sucksub::Extratitles->new() | ||||||
| 62 | $foo->useragent() # get the useragent attribute value | ||||||
| 63 | $foo->useragent('tructruc') # set the useragent attribute value to 'tructruc' | ||||||
| 64 | |||||||
| 65 | |||||||
| 66 | =head4 cookies_file() | ||||||
| 67 | |||||||
| 68 | arg must be a file, this default value can be modified by calling the | ||||||
| 69 | |||||||
| 70 | $foo->cookies_file('/where/my/cookies/are.txt') | ||||||
| 71 | |||||||
| 72 | modify the default value positionned by the new constructor. | ||||||
| 73 | |||||||
| 74 | $foo->cookies_file() | ||||||
| 75 | |||||||
| 76 | return the actual value of the cookies file path. | ||||||
| 77 | |||||||
| 78 | =head4 useragent() | ||||||
| 79 | |||||||
| 80 | arg should be a valid useragent. There's no reason to change this default value. | ||||||
| 81 | |||||||
| 82 | $foo->useragent() | ||||||
| 83 | |||||||
| 84 | return the value of the current useragent. | ||||||
| 85 | |||||||
| 86 | =head4 motif() | ||||||
| 87 | |||||||
| 88 | you should here give a real value to this function : | ||||||
| 89 | if $foo->motif is undef, the package execution will be aborted | ||||||
| 90 | |||||||
| 91 | $foo->motif('xxx') | ||||||
| 92 | |||||||
| 93 | allows to precise that you're searching a word that contains 'xxx' | ||||||
| 94 | |||||||
| 95 | $foo->motif() | ||||||
| 96 | |||||||
| 97 | return the current value of the string you search. | ||||||
| 98 | |||||||
| 99 | =head4 language() | ||||||
| 100 | |||||||
| 101 | Allows to set the langage for the subtitle search. | ||||||
| 102 | |||||||
| 103 | Default value is 0 : it means that all langages will be returned | ||||||
| 104 | |||||||
| 105 | $foo->langage('french') | ||||||
| 106 | |||||||
| 107 | allows to precise that you're searching a french subtitles | ||||||
| 108 | Common langages string values are : | ||||||
| 109 | |||||||
| 110 | Albanian | ||||||
| 111 | Argentino | ||||||
| 112 | Bosnian | ||||||
| 113 | Brazilian_portuguese | ||||||
| 114 | Bulgarian | ||||||
| 115 | Bulgarian_English | ||||||
| 116 | Chines GB code | ||||||
| 117 | Chinese | ||||||
| 118 | Croatian | ||||||
| 119 | Czech | ||||||
| 120 | Danish | ||||||
| 121 | Dutch/English | ||||||
| 122 | English | ||||||
| 123 | English - Hearing Impaired | ||||||
| 124 | English_German | ||||||
| 125 | Estonian | ||||||
| 126 | Finnish | ||||||
| 127 | French | ||||||
| 128 | German - Hearing Impaired | ||||||
| 129 | Germany | ||||||
| 130 | Greek | ||||||
| 131 | Hebrew | ||||||
| 132 | Hungarian/English | ||||||
| 133 | Hungary | ||||||
| 134 | Icelandic | ||||||
| 135 | Italy | ||||||
| 136 | Japanese | ||||||
| 137 | Kalle | ||||||
| 138 | Korean | ||||||
| 139 | |||||||
| 140 | |||||||
| 141 | =head4 debug() | ||||||
| 142 | |||||||
| 143 | WWW-Sucksub-Extratitles can produce a lot of interresting informations | ||||||
| 144 | The default value is "0" : that means that any debug informations will be written | ||||||
| 145 | on the output ( see the logout() method too.) | ||||||
| 146 | |||||||
| 147 | $foo->debug(0) # stop the product of debbugging informations | ||||||
| 148 | $foo->debug(1) # debug info will be written to the log file ( see logout() method) | ||||||
| 149 | |||||||
| 150 | =head4 logout() | ||||||
| 151 | |||||||
| 152 | if you want some debug information : args is 1, else 0 or undef | ||||||
| 153 | |||||||
| 154 | logout => undef; | ||||||
| 155 | |||||||
| 156 | output and optional debugging info will be produced on STDOUT | ||||||
| 157 | or any other descriptor if you give filename as arg. | ||||||
| 158 | |||||||
| 159 | =head4 dbfile() | ||||||
| 160 | |||||||
| 161 | define dbm file for store and retrieving extracted informations | ||||||
| 162 | you must provide a full path to the db file to store results. | ||||||
| 163 | the search() method can not be used without defined dbm file. | ||||||
| 164 | |||||||
| 165 | dbfile('/where/your/db/is.db') | ||||||
| 166 | |||||||
| 167 | The file will should be readable/writable. | ||||||
| 168 | |||||||
| 169 | =head4 html() | ||||||
| 170 | |||||||
| 171 | Define simple html output where to write search report. | ||||||
| 172 | you must provide au full path to the html file if you want to get an html output. | ||||||
| 173 | |||||||
| 174 | html('/where/the html/repport/is/written.html') | ||||||
| 175 | |||||||
| 176 | If $foo->html() is defined. you can get the value of this attribute like this : | ||||||
| 177 | |||||||
| 178 | my $html_page = $foo->html | ||||||
| 179 | |||||||
| 180 | html file will be used for repport with update() and search() methods. | ||||||
| 181 | The html page IS NOT a W3C conform html. It only allows to have a direct access to http links. | ||||||
| 182 | |||||||
| 183 | =head1 METHODS and FUNCTIONS | ||||||
| 184 | |||||||
| 185 | these functions use the precedent attributes value. | ||||||
| 186 | |||||||
| 187 | =head2 search() | ||||||
| 188 | |||||||
| 189 | this function takes no arguments. | ||||||
| 190 | it alows to launch a local dbm search. | ||||||
| 191 | |||||||
| 192 | $foo-> search() | ||||||
| 193 | |||||||
| 194 | the dbm file is read to give you every couple (title,link) which corresponds to | ||||||
| 195 | the motif() pattern. | ||||||
| 196 | |||||||
| 197 | =head2 update() | ||||||
| 198 | |||||||
| 199 | this function takes no arguments. | ||||||
| 200 | it allows to initiate the distant search on the web site Extratitles.com | ||||||
| 201 | the local dbm file is automatically written. Results are accumulated to the dbm file | ||||||
| 202 | you define with the . | ||||||
| 203 | |||||||
| 204 | =head2 get_all_result() | ||||||
| 205 | |||||||
| 206 | return a hash of every couple ( title, http link of subtitle file ) the search or update method returned. | ||||||
| 207 | |||||||
| 208 | my %hash=$foo->get_all_result() | ||||||
| 209 | |||||||
| 210 | |||||||
| 211 | =head1 SEE ALSO | ||||||
| 212 | |||||||
| 213 | =over 4 | ||||||
| 214 | |||||||
| 215 | =item * L |
||||||
| 216 | |||||||
| 217 | =item * L |
||||||
| 218 | |||||||
| 219 | =item * L |
||||||
| 220 | |||||||
| 221 | =item * L |
||||||
| 222 | |||||||
| 223 | =item * L |
||||||
| 224 | |||||||
| 225 | =item * L |
||||||
| 226 | |||||||
| 227 | =item * L |
||||||
| 228 | |||||||
| 229 | =back | ||||||
| 230 | |||||||
| 231 | =head1 AUTHOR | ||||||
| 232 | |||||||
| 233 | Timothée foucart, C<< |
||||||
| 234 | |||||||
| 235 | =head1 BUGS | ||||||
| 236 | |||||||
| 237 | Please report any bugs or feature requests to | ||||||
| 238 | C |
||||||
| 239 | L |
||||||
| 240 | I will be notified, and then you'll automatically be notified of progress on | ||||||
| 241 | your bug as I make changes. | ||||||
| 242 | |||||||
| 243 | =head1 ACKNOWLEDGEMENTS | ||||||
| 244 | |||||||
| 245 | =head1 COPYRIGHT & LICENSE | ||||||
| 246 | |||||||
| 247 | Copyright 2006 Timothée foucart, all rights reserved. | ||||||
| 248 | |||||||
| 249 | This program is free software; you can redistribute it and/or modify it | ||||||
| 250 | under the same terms as Perl itself. | ||||||
| 251 | |||||||
| 252 | =cut | ||||||
| 253 | |||||||
| 254 | 1 | 1 | 26076 | use warnings; | |||
| 1 | 4 | ||||||
| 1 | 30 | ||||||
| 255 | 1 | 1 | 5 | use strict; | |||
| 1 | 1 | ||||||
| 1 | 40 | ||||||
| 256 | require Exporter; | ||||||
| 257 | 1 | 1 | 27 | use vars qw(@ISA @EXPORT $VERSION); | |||
| 1 | 6 | ||||||
| 1 | 80 | ||||||
| 258 | @ISA = qw(Exporter); | ||||||
| 259 | @EXPORT=qw( cookies_file debug dbfile | ||||||
| 260 | get_all_result html logout | ||||||
| 261 | motif search update useragent language ); | ||||||
| 262 | |||||||
| 263 | 1 | 1 | 829 | use utf8; | |||
| 1 | 9 | ||||||
| 1 | 4 | ||||||
| 264 | 1 | 1 | 28 | use Carp; | |||
| 1 | 1 | ||||||
| 1 | 62 | ||||||
| 265 | 1 | 1 | 717 | use HTTP::Cookies; | |||
| 1 | 11876 | ||||||
| 1 | 34 | ||||||
| 266 | 1 | 1 | 1412 | use WWW::Mechanize; | |||
| 1 | 158951 | ||||||
| 1 | 52 | ||||||
| 267 | # | ||||||
| 268 | # | ||||||
| 269 | # -- | ||||||
| 270 | # | ||||||
| 271 | 1 | 1 | 504 | use Alias qw(attr); | |||
| 0 | |||||||
| 0 | |||||||
| 272 | use vars qw( $base $site $cookies_file $useragent $motif $debug $logout $html $dbfile $okdbfile $nbres $totalres %sstsav $fh $language %langhash); | ||||||
| 273 | # | ||||||
| 274 | # global var | ||||||
| 275 | my $fh; | ||||||
| 276 | my %sstsav; | ||||||
| 277 | my %langhash; | ||||||
| 278 | $langhash{'all'}='0'; | ||||||
| 279 | $langhash{'Albanian'}='42'; | ||||||
| 280 | $langhash{'Argentino'}='43'; | ||||||
| 281 | $langhash{'Bosnian'}='44'; | ||||||
| 282 | $langhash{'Brazilian_portuguese'}='13'; | ||||||
| 283 | $langhash{'Bulgarian' }='14'; | ||||||
| 284 | $langhash{'Bulgarian_English' }='15'; | ||||||
| 285 | $langhash{'Chines GB code' }='32'; | ||||||
| 286 | $langhash{'Chinese' }='16'; | ||||||
| 287 | $langhash{'Croatian' }='17'; | ||||||
| 288 | $langhash{'Czech' }='2'; | ||||||
| 289 | $langhash{'Danish' } ='12'; | ||||||
| 290 | $langhash{'Dutch/English' }='33'; | ||||||
| 291 | $langhash{'English' }='1'; | ||||||
| 292 | $langhash{'English - Hearing Impaired' }='34'; | ||||||
| 293 | $langhash{'English_German' }='18'; | ||||||
| 294 | $langhash{'Estonian'}='35'; | ||||||
| 295 | $langhash{'Finnish'}='5'; | ||||||
| 296 | $langhash{'French'}='6'; | ||||||
| 297 | $langhash{'German - Hearing Impaired' }='36'; | ||||||
| 298 | $langhash{'Germany' }='7'; | ||||||
| 299 | $langhash{'Greek' }='19'; | ||||||
| 300 | $langhash{'Hebrew' }='37'; | ||||||
| 301 | $langhash{'Hungarian/English'}='38'; | ||||||
| 302 | $langhash{'Hungary' }='8'; | ||||||
| 303 | $langhash{'Icelandic' }='20'; | ||||||
| 304 | $langhash{'Italy'}='3'; | ||||||
| 305 | $langhash{'Japanese'}='39'; | ||||||
| 306 | $langhash{'Kalle'}='21'; | ||||||
| 307 | $langhash{'Korean'}='22'; | ||||||
| 308 | $langhash{'Latvian'}='40'; | ||||||
| 309 | $langhash{'Lithuanian'}='45'; | ||||||
| 310 | $langhash{'Macedonian'}='41'; | ||||||
| 311 | $langhash{'Netherlands'}='10'; | ||||||
| 312 | $langhash{'Norwegian'}='23'; | ||||||
| 313 | $langhash{'Polish'}='4'; | ||||||
| 314 | $langhash{'Portuguese'}='24'; | ||||||
| 315 | $langhash{'Romanian'}='30'; | ||||||
| 316 | $langhash{'Russian'}='25'; | ||||||
| 317 | $langhash{'Serbian'}='29'; | ||||||
| 318 | $langhash{'Slovak'}='31'; | ||||||
| 319 | $langhash{'Slovenian'}='28'; | ||||||
| 320 | $langhash{'Spanish'}='9'; | ||||||
| 321 | $langhash{'Swedish'}='11'; | ||||||
| 322 | $langhash{'Turkish'}='26'; | ||||||
| 323 | $langhash{'other' }='27'; | ||||||
| 324 | # | ||||||
| 325 | # | ||||||
| 326 | sub new{ | ||||||
| 327 | my $Extratitles=shift; | ||||||
| 328 | my $classe= ref($Extratitles) || $Extratitles; | ||||||
| 329 | my $self={ }; | ||||||
| 330 | bless($self,$classe); | ||||||
| 331 | $self->_init(@_); | ||||||
| 332 | logout($self->{logout}); | ||||||
| 333 | #language($self->{language}); | ||||||
| 334 | return $self; | ||||||
| 335 | }; | ||||||
| 336 | sub _init{ | ||||||
| 337 | my $self= attr shift; | ||||||
| 338 | # | ||||||
| 339 | # -- init default values | ||||||
| 340 | # | ||||||
| 341 | $self->{base} = "http://titles.box.sk/"; | ||||||
| 342 | $self->{site} = "http://titles.box.sk/index.php"; | ||||||
| 343 | $self->{cookies_file} = "$ENV{HOME}"."/.cookies_sksb"; | ||||||
| 344 | $self->{useragent} = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007"; | ||||||
| 345 | $self->{motif} = undef; | ||||||
| 346 | $self->{debug} = 1; | ||||||
| 347 | $self->{logout} = undef; | ||||||
| 348 | $self->{html} = "$ENV{HOME}"."/Extratitles_report.html"; | ||||||
| 349 | $self->{dbfile} = "$ENV{HOME}"."/Extratitles_db.db"; | ||||||
| 350 | $self->{okdbfile} = 0; | ||||||
| 351 | $self->{sstsav} ={}; | ||||||
| 352 | $self->{language} ='all'; | ||||||
| 353 | # | ||||||
| 354 | # -- replace forced values | ||||||
| 355 | # | ||||||
| 356 | if (@_) | ||||||
| 357 | { | ||||||
| 358 | my %param=@_; | ||||||
| 359 | while (my($x,$y) =each(%param)){$self->{$x}=$y;}; | ||||||
| 360 | } | ||||||
| 361 | return $self; | ||||||
| 362 | }; | ||||||
| 363 | |||||||
| 364 | sub useragent { | ||||||
| 365 | my $self =attr shift; | ||||||
| 366 | if (@_) {$useragent=shift;} | ||||||
| 367 | return $useragent; | ||||||
| 368 | } | ||||||
| 369 | |||||||
| 370 | sub dbfile { | ||||||
| 371 | my $self =attr shift; | ||||||
| 372 | if (@_) {$dbfile=shift;$okdbfile=1}; | ||||||
| 373 | if ($okdbfile==0) {return undef;}; | ||||||
| 374 | return $dbfile; | ||||||
| 375 | } | ||||||
| 376 | sub debug { | ||||||
| 377 | my $self =attr shift; | ||||||
| 378 | if (@_) {$debug=shift;} | ||||||
| 379 | return $debug; | ||||||
| 380 | } | ||||||
| 381 | sub _sstsav { | ||||||
| 382 | my $self =attr shift; | ||||||
| 383 | if (@_) {%sstsav=shift;} | ||||||
| 384 | return %sstsav; | ||||||
| 385 | } | ||||||
| 386 | sub get_all_result { | ||||||
| 387 | my $self =attr shift; | ||||||
| 388 | %sstsav=$self->_sstsav(); | ||||||
| 389 | return %sstsav; | ||||||
| 390 | } | ||||||
| 391 | sub cookies_file { | ||||||
| 392 | my $self =attr shift; | ||||||
| 393 | if (@_) {$cookies_file=shift;} | ||||||
| 394 | return $cookies_file; | ||||||
| 395 | } | ||||||
| 396 | sub motif { | ||||||
| 397 | my $self = attr shift; | ||||||
| 398 | if (@_) {$motif=shift}; | ||||||
| 399 | return $motif; | ||||||
| 400 | } | ||||||
| 401 | sub logout { | ||||||
| 402 | if (@_){$logout=shift; } | ||||||
| 403 | if ($logout) | ||||||
| 404 | { open(FH , ">>", $logout) or croak " can not open $logout : $!\n"; | ||||||
| 405 | $fh=(\*FH);} | ||||||
| 406 | else | ||||||
| 407 | { open (FH, ">&STDOUT" ) or croak "Can't dup STDOUT: $!"; | ||||||
| 408 | $fh=(\*STDOUT);}; | ||||||
| 409 | return $logout; | ||||||
| 410 | } | ||||||
| 411 | sub html { | ||||||
| 412 | my $self =attr shift; | ||||||
| 413 | if (@_) {$html=shift;} | ||||||
| 414 | else {$html=$self;}; | ||||||
| 415 | unless (-e ($html)) | ||||||
| 416 | { | ||||||
| 417 | print $fh "[DEBUG] html report file doesn't exists \n"; | ||||||
| 418 | print $fh "[DEBUG] default value is now : ".$self->{html}." \n"; | ||||||
| 419 | } | ||||||
| 420 | return $html; | ||||||
| 421 | } | ||||||
| 422 | sub _open_html{ | ||||||
| 423 | open(HTMLFILE,">>",$html) | ||||||
| 424 | or croak "can not create $html : $! \n"; | ||||||
| 425 | print HTMLFILE " report generated by suckSub perl module \n"; |
||||||
| 426 | print HTMLFILE "searching : ".motif()." on ".$site." \n"; |
||||||
| 427 | print HTMLFILE " ".localtime()." \n"; |
||||||
| 428 | return; | ||||||
| 429 | } | ||||||
| 430 | sub language{ | ||||||
| 431 | my $self =attr shift; | ||||||
| 432 | if (@_){$language=shift; | ||||||
| 433 | if (defined($langhash{$language})) | ||||||
| 434 | { | ||||||
| 435 | my $lang=$langhash{$language}; | ||||||
| 436 | printf "langage is now set to ".$language." [ ".$lang." ]\n" if ($debug); | ||||||
| 437 | return $lang;} | ||||||
| 438 | else | ||||||
| 439 | { croak "language $language is not recognized !\n";}; | ||||||
| 440 | }; | ||||||
| 441 | }; | ||||||
| 442 | sub update { | ||||||
| 443 | my $self =attr shift; | ||||||
| 444 | unless ($motif){croak "You must provide a string value to motif()....exit\n";}; | ||||||
| 445 | my $mech = WWW::Mechanize->new(agent=>$useragent, | ||||||
| 446 | cookie_jar => HTTP::Cookies->new( | ||||||
| 447 | file => $cookies_file, | ||||||
| 448 | autosave => 1, | ||||||
| 449 | ignore_discard => 0, | ||||||
| 450 | ), | ||||||
| 451 | stack_depth => 1, | ||||||
| 452 | ); | ||||||
| 453 | my $next=0;# next page indicator (0/1) | ||||||
| 454 | |||||||
| 455 | if ($html){_open_html();}; | ||||||
| 456 | print $fh "------------------------------------------------------------------------------------------------\n" if ($debug); | ||||||
| 457 | print $fh "[DEBUG] begin scan on $site at : ".localtime()."\n" if ($debug); | ||||||
| 458 | print $fh "[DEBUG] searching : ".$motif." on $site \n" if ($debug); | ||||||
| 459 | my $page = 1; # pagination | ||||||
| 460 | if ($debug) {print $fh "\n[DEBUG \t Extratitles PAGE $page]\n";}; | ||||||
| 461 | $mech->get('http://titles.box.sk/index.php?p=se&pas=as') or warn "[WARNING] http get problem on : $site !! \n"; | ||||||
| 462 | # launch advanced search research | ||||||
| 463 | $mech->form_name('as_form'); | ||||||
| 464 | $mech->select( 'jaz' , $langhash{$self->{language}} );#i.e. langage = french | ||||||
| 465 | $mech->field( 'z3' , $motif,1 ); | ||||||
| 466 | $mech->field( 'p', 'se',1 ); | ||||||
| 467 | $mech->click(); | ||||||
| 468 | if ($debug) { print $fh "[DEBUG \t GET URL \t : \t ".$mech->uri()."]\n" if ($debug);}; | ||||||
| 469 | # so we parse all result page one by one | ||||||
| 470 | ($nbres,$next) = _parse_Extratitles($mech,$page); | ||||||
| 471 | printf $fh "[DEBUG] next page detected \n" if $debug; | ||||||
| 472 | $totalres=$nbres; | ||||||
| 473 | # | ||||||
| 474 | # verify if we need to change page to get next search results | ||||||
| 475 | # | ||||||
| 476 | while ($next>0) | ||||||
| 477 | { | ||||||
| 478 | $page = $page+1; | ||||||
| 479 | #http://titles.box.sk/index.php?pid=subt2&p=se&bp=40&bn=0&z3=e&jaz=6 | ||||||
| 480 | # | | | \langage | ||||||
| 481 | # | | \z3=search motif | ||||||
| 482 | # | \display range begin | ||||||
| 483 | # \number of subtitles to display | ||||||
| 484 | my $nbdisp=$page*20; | ||||||
| 485 | $mech->get( "http://titles.box.sk/index.php?pid=subt2&p=se&bp=20&bn=".$nbdisp."&z3=".$motif."&jaz=".$langhash{$self->{language}}) | ||||||
| 486 | or warn "get problem on page : $page : $! \n"; | ||||||
| 487 | if ($debug) { print $fh "[DEBUG \t PAGE : $page]\n";print $fh "[DEBUG \t GET URL \t : \t ".$mech->uri() ."]\n";}; | ||||||
| 488 | ($nbres,$next) = _parse_Extratitles($mech,$page); | ||||||
| 489 | $totalres=$totalres+$nbres; | ||||||
| 490 | }; | ||||||
| 491 | |||||||
| 492 | # | ||||||
| 493 | print $fh "[DEBUG \t : $totalres found on $base]\n" if ($debug); | ||||||
| 494 | print $fh "[END]\n" if ($debug); | ||||||
| 495 | #print html report | ||||||
| 496 | if ($html) | ||||||
| 497 | { | ||||||
| 498 | $nbres=0; | ||||||
| 499 | while (my ($k,$v) =each(%sstsav)) | ||||||
| 500 | { | ||||||
| 501 | print HTMLFILE "".$v." \n"; |
||||||
| 502 | $nbres++; | ||||||
| 503 | } | ||||||
| 504 | |||||||
| 505 | } | ||||||
| 506 | #finish and close all open file(s) | ||||||
| 507 | if ($html) | ||||||
| 508 | { | ||||||
| 509 | print HTMLFILE " ".$nbres." result(s) found \n"; |
||||||
| 510 | print HTMLFILE " report finished at ".localtime()." \n"; |
||||||
| 511 | } | ||||||
| 512 | |||||||
| 513 | |||||||
| 514 | close HTMLFILE; | ||||||
| 515 | return; | ||||||
| 516 | }; | ||||||
| 517 | |||||||
| 518 | # | ||||||
| 519 | # ---local search if $dbfile exist | ||||||
| 520 | # | ||||||
| 521 | sub search { | ||||||
| 522 | my $self =attr shift; | ||||||
| 523 | unless ($motif){croak "You must provide a string value to motif() attribute....exit\n";}; | ||||||
| 524 | unless ($dbfile) { croak " no DB file defined : exit ... \n";}; | ||||||
| 525 | #html report | ||||||
| 526 | if ($html) | ||||||
| 527 | { | ||||||
| 528 | open(HTMLFILE,">>",$html) | ||||||
| 529 | or croak "can not create $html : $! \n"; | ||||||
| 530 | print HTMLFILE " local search on dm file : $dbfile \n"; |
||||||
| 531 | print HTMLFILE "searching : ".$motif." on ".$site." \n"; |
||||||
| 532 | print HTMLFILE " ".localtime()." \n"; |
||||||
| 533 | }; | ||||||
| 534 | #print html report | ||||||
| 535 | #local search --> print and finish html report | ||||||
| 536 | _search_dbm($dbfile); | ||||||
| 537 | return; | ||||||
| 538 | |||||||
| 539 | }; | ||||||
| 540 | # | ||||||
| 541 | #--- this function = to parse only one result page | ||||||
| 542 | # | ||||||
| 543 | sub _parse_Extratitles{ | ||||||
| 544 | my $mech=$_[0];my $page =$_[1]; | ||||||
| 545 | my $jnd=0; my $jnd2=0; | ||||||
| 546 | my $oktitle=0;my $okurl=0; | ||||||
| 547 | my $next_page_exists=0; | ||||||
| 548 | my $f_url; my $f_title; | ||||||
| 549 | my $lnk=$mech->find_all_links(); | ||||||
| 550 | my $nbl = $#{$lnk}; my $ind=0; | ||||||
| 551 | print $fh "[DEBUG] searching links on : ".$mech->uri()." ]\n" if ($debug); | ||||||
| 552 | |||||||
| 553 | # =4= rechercher les liens des reponses de la recherche | ||||||
| 554 | my @sstlist=[];my @ssturl=[];# memo array | ||||||
| 555 | for ( my $ind=0; $ind < $#{$lnk} ; $ind++) | ||||||
| 556 | { | ||||||
| 557 | # search and memorize the subtitle label | ||||||
| 558 | # can be fixed if site changes | ||||||
| 559 | # --Title links should have these syntax : | ||||||
| 560 | # --http://titles.box.sk/index.php?pid=subt2&p=i&rid= 207488 | ||||||
| 561 | # --Subtitle file must have an url text = "DOWNLOAD" | ||||||
| 562 | #search lovie name | ||||||
| 563 | if ( ($lnk->[$ind]->url() =~ m/(^?pid=subt2\&p=i\&rid=)([0-9]+$)/g ) | ||||||
| 564 | and ($lnk->[$ind]->text()!~m/MORE INFO/) | ||||||
| 565 | ) | ||||||
| 566 | { | ||||||
| 567 | push @sstlist,$lnk->[$ind]->text(); | ||||||
| 568 | print $fh "[FOUND MOVIE NAME]\n\t".$lnk->[$ind]->text()."\n" if $debug; | ||||||
| 569 | $f_title=scalar($lnk->[$ind]->text()); | ||||||
| 570 | $oktitle=1; | ||||||
| 571 | }; | ||||||
| 572 | # search subtitle url to download | ||||||
| 573 | # test if text() is f=defined avoid warning | ||||||
| 574 | # then if found, we can save in sstsav hash | ||||||
| 575 | if ( (defined($lnk->[$ind]->text()) and ($lnk->[$ind]->text() =~m/(\DOWNLOAD)/) ) | ||||||
| 576 | ) | ||||||
| 577 | { | ||||||
| 578 | push @ssturl,$lnk->[$ind]->url_abs(); | ||||||
| 579 | print $fh "[FOUND SUBTITLE LINK]\n\t". scalar($lnk->[$ind]->url_abs()) ."\n" if $debug; | ||||||
| 580 | $f_url=scalar($lnk->[$ind]->url_abs()); | ||||||
| 581 | $okurl=1; | ||||||
| 582 | if ($oktitle==1) | ||||||
| 583 | { | ||||||
| 584 | $sstsav{$f_url}=$f_title; | ||||||
| 585 | $oktitle=0;$okurl=0; | ||||||
| 586 | } | ||||||
| 587 | }; | ||||||
| 588 | if (defined($lnk->[$ind]->text()) and ($lnk->[$ind]->text() =~/>/) ) | ||||||
| 589 | {$next_page_exists=1;} | ||||||
| 590 | }; | ||||||
| 591 | # verify we get any result for the search request | ||||||
| 592 | if ( $#ssturl < 1) { print $fh " PAS DE RESULTAT pour $motif sur Extratitles\n";return (0,0);}; | ||||||
| 593 | #else save and print if $html | ||||||
| 594 | print $fh "[DEBUG] Found ". $#ssturl ." subtitles on page : ".$page."\n" if ($debug); | ||||||
| 595 | $nbres=$#ssturl; | ||||||
| 596 | _save_dbm(); | ||||||
| 597 | #and reinit sstsav | ||||||
| 598 | %sstsav={}; | ||||||
| 599 | return ($nbres,$next_page_exists); | ||||||
| 600 | }; | ||||||
| 601 | sub _save_dbm{ | ||||||
| 602 | my %xstsav; | ||||||
| 603 | use DB_File; | ||||||
| 604 | tie (%xstsav,'DB_File',$dbfile ) | ||||||
| 605 | or croak "can not use $dbfile : $!\n"; | ||||||
| 606 | while (my ($k, $v) = each %sstsav) | ||||||
| 607 | { $xstsav{$k}=$v; print $fh "[DEBUG][DBM] saving $v [$k] into db \n" if ($debug);}; | ||||||
| 608 | untie(%xstsav); | ||||||
| 609 | return; | ||||||
| 610 | }; | ||||||
| 611 | sub _search_dbm{ | ||||||
| 612 | use DB_File; | ||||||
| 613 | my %hashread; | ||||||
| 614 | my $nb_local_res; | ||||||
| 615 | unless (-e ($dbfile)) | ||||||
| 616 | {croak "[DEBUG SEARCH] db file ".$dbfile." not found ! \n";}; | ||||||
| 617 | tie(%hashread,'DB_File',$dbfile) | ||||||
| 618 | or croak "can not access : $dbfile : $!\n"; | ||||||
| 619 | if ($html) | ||||||
| 620 | { | ||||||
| 621 | print HTMLFILE " Searching : ".$motif." on local database : \n"; |
||||||
| 622 | print HTMLFILE " DBM file is :".$dbfile." \n"; |
||||||
| 623 | } | ||||||
| 624 | while (my ($k,$v)=each(%hashread)) | ||||||
| 625 | { | ||||||
| 626 | if ($v =~ m/$motif/i) | ||||||
| 627 | { | ||||||
| 628 | print $fh "[FOUND Libelle ] $v \n[FOUND LINK]". $base.$k ."\n" if $debug; | ||||||
| 629 | if ($html) | ||||||
| 630 | { | ||||||
| 631 | my $url=$k; | ||||||
| 632 | if ($k !~ m/http:\/\//im){my $url=$base.$k} | ||||||
| 633 | print HTMLFILE "".$v." \n"; |
||||||
| 634 | $nb_local_res++; | ||||||
| 635 | }; | ||||||
| 636 | }; | ||||||
| 637 | }; | ||||||
| 638 | untie(%hashread); | ||||||
| 639 | if ($html) | ||||||
| 640 | { | ||||||
| 641 | print HTMLFILE " [ ".$nb_local_res." result(s) found on local DB ] \n"; |
||||||
| 642 | print HTMLFILE " report finished at ".localtime()." \n"; |
||||||
| 643 | } | ||||||
| 644 | return; | ||||||
| 645 | }; | ||||||
| 646 | sub END{ | ||||||
| 647 | my $self =attr shift; | ||||||
| 648 | close HTMLFILE;close FH; | ||||||
| 649 | return; | ||||||
| 650 | }; | ||||||
| 651 | |||||||
| 652 | 1; # End of WWW-Sucksub::Extratitles |