| blib/lib/WWW/Sucksub/Frigo.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 22 | 24 | 91.6 |
| branch | n/a | ||
| condition | n/a | ||
| subroutine | 8 | 8 | 100.0 |
| pod | n/a | ||
| total | 30 | 32 | 93.7 |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | package WWW::Sucksub::Frigo; | ||||||
| 2 | =head1 NAME | ||||||
| 3 | |||||||
| 4 | WWW::Sucksub::Frigo - Automated access to frigorifix subtibles database | ||||||
| 5 | |||||||
| 6 | =head1 VERSION | ||||||
| 7 | |||||||
| 8 | Version 0.03 | ||||||
| 9 | |||||||
| 10 | =cut | ||||||
| 11 | |||||||
| 12 | our $VERSION = '0.03'; | ||||||
| 13 | |||||||
| 14 | =head1 SYNOPSIS | ||||||
| 15 | |||||||
| 16 | WWW::SuckSub::Frigo is a web robot based on the WWW::Mechanize Module | ||||||
| 17 | This module search and collect distant result on the frigorifix.com web database. | ||||||
| 18 | Subtitles Files urls and associated titles are stored in a dbm file. | ||||||
| 19 | Distant and local subtitles search are possible. Accessing to the local database thru simple html generated repport. | ||||||
| 20 | |||||||
| 21 | |||||||
| 22 | |||||||
| 23 | use WWW::Sucksub::Frigo; | ||||||
| 24 | my $foo = WWW::Sucksub::Frigo->new( | ||||||
| 25 | dbfile=> '/where/your/DBM/file is.db', | ||||||
| 26 | html =>'/where/your/html/repport/is.html', | ||||||
| 27 | motif=> 'the word(s) you search', | ||||||
| 28 | debug=> 1, | ||||||
| 29 | logout => '/where/your/debug/info/are/written.log', | ||||||
| 30 | ); ); | ||||||
| 31 | $foo->search(); # collect all link corresponding to the $foo->motif() | ||||||
| 32 | $foo->motif('x'); # modify the search criteria | ||||||
| 33 | $foo->searchdbm(); # launch a search only on the local database | ||||||
| 34 | |||||||
| 35 | Html report should be generated at the end of search() and searchdbm(). | ||||||
| 36 | |||||||
| 37 | =head1 CONSTRUCTOR AND STARTUP | ||||||
| 38 | |||||||
| 39 | =head2 Frigo Constructor | ||||||
| 40 | |||||||
| 41 | The new() constructor, is associated to default values : | ||||||
| 42 | you can modify these one as shown in the synopsis example. | ||||||
| 43 | initial values you can modify are these : | ||||||
| 44 | |||||||
| 45 | my $foo = WWW::Sucksub::Frigo->new( | ||||||
| 46 | html=> "$ENV{HOME}"."/frigorifix_report.html", | ||||||
| 47 | dbfile => "$ENV{HOME}"."/frigorifix_db.db", | ||||||
| 48 | motif=> undef, | ||||||
| 49 | tmpfile_frigo = > "$ENV{HOME}"."/tmp_frigo.html", | ||||||
| 50 | debug=> 0, | ||||||
| 51 | usedbm=>0, | ||||||
| 52 | username_frigo=>"", | ||||||
| 53 | password_frigo=>"", | ||||||
| 54 | logout => \*STDOUT | ||||||
| 55 | useragent=> "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007" | ||||||
| 56 | ); | ||||||
| 57 | See _init() internal function for more details. | ||||||
| 58 | |||||||
| 59 | =head3 new() constructor attributes and associated methods | ||||||
| 60 | |||||||
| 61 | Few attributes can be set thru new() attributes. | ||||||
| 62 | All attributes can be modified by corresponding methods: | ||||||
| 63 | |||||||
| 64 | $foo->WWW::Sucksub::Frigo->new() | ||||||
| 65 | $foo->useragent() # get the useragent attribute value | ||||||
| 66 | $foo->useragent('tructruc') # set the useragent attribute value to 'tructruc' | ||||||
| 67 | |||||||
| 68 | |||||||
| 69 | =head4 cookies_file() | ||||||
| 70 | |||||||
| 71 | arg must be a file, this default value can be modified by calling the | ||||||
| 72 | |||||||
| 73 | $foo->cookies_file('/where/my/cookies/are.txt') | ||||||
| 74 | |||||||
| 75 | modify the default value positionned by the new constructor. | ||||||
| 76 | |||||||
| 77 | $foo->cookies_file() | ||||||
| 78 | |||||||
| 79 | return the actual value of the cookies file path. | ||||||
| 80 | |||||||
| 81 | |||||||
| 82 | =head4 usedbm(0/1) | ||||||
| 83 | |||||||
| 84 | Default value is 0. In this case, calling search() method won't affect the dbm file. | ||||||
| 85 | You need to set the value 1 to activate dbm update | ||||||
| 86 | |||||||
| 87 | $foo->usedbm(1) | ||||||
| 88 | |||||||
| 89 | This method will fails if no dbm file ( see dbfile() ) has been defined. | ||||||
| 90 | |||||||
| 91 | |||||||
| 92 | =head4 useragent() | ||||||
| 93 | |||||||
| 94 | arg should be a valid useragent. There's no reason to change this default value. | ||||||
| 95 | |||||||
| 96 | $foo->useragent() | ||||||
| 97 | |||||||
| 98 | return the value of the current useragent. | ||||||
| 99 | |||||||
| 100 | =head4 tmpfile_frigo() | ||||||
| 101 | |||||||
| 102 | Frigo.pm needs to write temporary file. | ||||||
| 103 | the tmpfile_frigo() method allows you to change path of this temporary file : | ||||||
| 104 | |||||||
| 105 | $foo=WWW::Sucksub::Frigo->new( | ||||||
| 106 | ... | ||||||
| 107 | tmpfile_frigo => '/where/tmp/file/is/written.html', | ||||||
| 108 | ... | ||||||
| 109 | ); | ||||||
| 110 | |||||||
| 111 | To retrieve temporary file path : | ||||||
| 112 | |||||||
| 113 | my $tmp=$foo->tmpfile_frigo(); | ||||||
| 114 | |||||||
| 115 | To change temporary file path: | ||||||
| 116 | |||||||
| 117 | $foo->tmpfile_frigo('/where/my/new/tmp/file/is/written.html'); | ||||||
| 118 | |||||||
| 119 | |||||||
| 120 | =head4 motif() | ||||||
| 121 | |||||||
| 122 | you should here give a real value to this function : | ||||||
| 123 | if $foo->motif stays undef, the package execution will be aborted | ||||||
| 124 | |||||||
| 125 | $foo->motif('xxx') | ||||||
| 126 | |||||||
| 127 | allows to precise that you're searching a word that contains 'xxx' | ||||||
| 128 | |||||||
| 129 | $foo->motif() | ||||||
| 130 | |||||||
| 131 | return the current value of the string you search. | ||||||
| 132 | |||||||
| 133 | =head4 debug() | ||||||
| 134 | |||||||
| 135 | WWW-Sucksub-Frigo can produce a lot of interresting informations | ||||||
| 136 | The default value is "0" : that means that any debug informations will be written | ||||||
| 137 | on the output ( see the logout() method too.) | ||||||
| 138 | |||||||
| 139 | $foo->debug(0) # stop the product of debbugging informations | ||||||
| 140 | $foo->debug(1) # debug info will be written to the log file ( see logout() method) | ||||||
| 141 | |||||||
| 142 | =head4 logout() | ||||||
| 143 | |||||||
| 144 | if you want some debug informations, you should set the debug attribute to 1 | ||||||
| 145 | See debug() method for more precisions. | ||||||
| 146 | logout() method is associated to the debug() attribute value. | ||||||
| 147 | It indicates path where debug info will be written. | ||||||
| 148 | Default value is : | ||||||
| 149 | |||||||
| 150 | $foo=WWW::Sucksub::Frigo->new( | ||||||
| 151 | ... | ||||||
| 152 | logout => \*STDOUT, | ||||||
| 153 | ..., | ||||||
| 154 | ) | ||||||
| 155 | |||||||
| 156 | output and optional debugging info will be produced ont STDOUT | ||||||
| 157 | or any other descriptor if you give filename as arg, by example : | ||||||
| 158 | |||||||
| 159 | $foo=WWW::Sucksub::Frigo->new( | ||||||
| 160 | ... | ||||||
| 161 | logout => '/where/my/log/is/written.txt', | ||||||
| 162 | ..., | ||||||
| 163 | ) | ||||||
| 164 | |||||||
| 165 | =head4 dbfile() | ||||||
| 166 | |||||||
| 167 | define dbm file for store and retrieving extracted informations | ||||||
| 168 | you must provide a full path to the db file to store results. | ||||||
| 169 | the search() method can not be used without defined dbm file. | ||||||
| 170 | |||||||
| 171 | $foo->dbfile('/where/your/db/is.db') | ||||||
| 172 | |||||||
| 173 | The file will should be readable/writable. | ||||||
| 174 | |||||||
| 175 | =head4 html() | ||||||
| 176 | |||||||
| 177 | Define simple html output where to write search report. | ||||||
| 178 | you must provide au full path to the html file if you want to get an html output. | ||||||
| 179 | |||||||
| 180 | $foo->html('/where/the html/repport/is/written.html') | ||||||
| 181 | |||||||
| 182 | If $foo->html() is defined. you can get the value of this attribute like this : | ||||||
| 183 | |||||||
| 184 | my $html_page = $foo->html() | ||||||
| 185 | |||||||
| 186 | html file will be used for report with search and searchdbm() methods. | ||||||
| 187 | |||||||
| 188 | =head4 username_frigo() | ||||||
| 189 | |||||||
| 190 | Allow you to login and obtain cookies from frigorifix web site | ||||||
| 191 | |||||||
| 192 | $foo->username_frigo('my_login') | ||||||
| 193 | |||||||
| 194 | Default value is empty. there's no obligation to fill it. | ||||||
| 195 | Otherwise, you should fill password_frigo() too. | ||||||
| 196 | |||||||
| 197 | =head4 password_frigo() | ||||||
| 198 | |||||||
| 199 | Allow you to login and obtain cookies from frigorifix web site | ||||||
| 200 | |||||||
| 201 | $foo->password_frigo('my_password') | ||||||
| 202 | |||||||
| 203 | Default value is empty. there's no obligations to fill it. | ||||||
| 204 | Otherwise, you should fill username_frigo() too. | ||||||
| 205 | |||||||
| 206 | |||||||
| 207 | |||||||
| 208 | =head1 METHODS and FUNCTIONS | ||||||
| 209 | |||||||
| 210 | these functions use the precedent attributes value. | ||||||
| 211 | |||||||
| 212 | =head2 search() | ||||||
| 213 | |||||||
| 214 | this function takes no arguments. | ||||||
| 215 | it alows to launch a local dbm search. | ||||||
| 216 | |||||||
| 217 | $foo-> search() | ||||||
| 218 | |||||||
| 219 | the dbm file is read to give you every couple (title,link) which corresponds to | ||||||
| 220 | the motif() pattern. | ||||||
| 221 | |||||||
| 222 | =head2 searchdbm() | ||||||
| 223 | |||||||
| 224 | this function takes no arguments. | ||||||
| 225 | it allows to initiate the distant search on the web site frigorifix | ||||||
| 226 | the local dbm file is automatically written. Results are accumulated to the dbm file | ||||||
| 227 | you defined. | ||||||
| 228 | a search pattern must be define thru motif() method before launching a dbm search. | ||||||
| 229 | |||||||
| 230 | =head2 get_all_result() | ||||||
| 231 | |||||||
| 232 | return a hash of every couple ( title, http link of subtitle file ) the search or update method returned. | ||||||
| 233 | |||||||
| 234 | my %hash=$foo->get_all_result() | ||||||
| 235 | |||||||
| 236 | |||||||
| 237 | =head1 SEE ALSO | ||||||
| 238 | |||||||
| 239 | =over 4 | ||||||
| 240 | |||||||
| 241 | =item * L |
||||||
| 242 | |||||||
| 243 | =item * L |
||||||
| 244 | |||||||
| 245 | =item * L |
||||||
| 246 | |||||||
| 247 | =item * L |
||||||
| 248 | |||||||
| 249 | =item * L |
||||||
| 250 | |||||||
| 251 | =item * L |
||||||
| 252 | |||||||
| 253 | =back | ||||||
| 254 | |||||||
| 255 | =head1 AUTHOR | ||||||
| 256 | |||||||
| 257 | Timothée foucart, C<< |
||||||
| 258 | |||||||
| 259 | =head1 BUGS | ||||||
| 260 | |||||||
| 261 | Please report any bugs or feature requests to | ||||||
| 262 | C |
||||||
| 263 | L |
||||||
| 264 | I will be notified, and then you'll automatically be notified of progress on | ||||||
| 265 | your bug as I make changes. | ||||||
| 266 | |||||||
| 267 | =cut | ||||||
| 268 | |||||||
| 269 | 1 | 1 | 21417 | use vars qw(@ISA @EXPORT $VERSION); | |||
| 1 | 3 | ||||||
| 1 | 101 | ||||||
| 270 | @ISA = qw(Exporter); | ||||||
| 271 | @EXPORT=qw( debug dbfile tmpfile_frigo | ||||||
| 272 | get_all_result html logout | ||||||
| 273 | motif search searchdbm useragent | ||||||
| 274 | usedbm username_frigo password_frigo ); | ||||||
| 275 | |||||||
| 276 | |||||||
| 277 | 1 | 1 | 7 | use warnings; | |||
| 1 | 2 | ||||||
| 1 | 37 | ||||||
| 278 | 1 | 1 | 5 | use strict; | |||
| 1 | 7 | ||||||
| 1 | 40 | ||||||
| 279 | 1 | 1 | 6 | use Carp; | |||
| 1 | 2 | ||||||
| 1 | 109 | ||||||
| 280 | 1 | 1 | 1080 | use HTML::Form; | |||
| 1 | 37206 | ||||||
| 1 | 35 | ||||||
| 281 | 1 | 1 | 954 | use HTTP::Cookies; | |||
| 1 | 13934 | ||||||
| 1 | 31 | ||||||
| 282 | 1 | 1 | 1500 | use WWW::Mechanize; | |||
| 1 | 174102 | ||||||
| 1 | 46 | ||||||
| 283 | # | ||||||
| 284 | 1 | 1 | 511 | use Alias qw(attr); | |||
| 0 | |||||||
| 0 | |||||||
| 285 | use vars qw($cookies_file $site $nbres | ||||||
| 286 | $base $debug $useragent $motif | ||||||
| 287 | %sstsav $username_frigo $password_frigo | ||||||
| 288 | $logout $srchadr $fh $loginpage $html | ||||||
| 289 | $usedbm $dbfile | ||||||
| 290 | $mech ); | ||||||
| 291 | # | ||||||
| 292 | sub new{ | ||||||
| 293 | my $frigo=shift; | ||||||
| 294 | my $classe= ref($frigo) || $frigo; | ||||||
| 295 | my $self={ }; | ||||||
| 296 | bless($self,$classe); | ||||||
| 297 | $self->_init(@_); | ||||||
| 298 | logout($self->{logout}); | ||||||
| 299 | return $self; | ||||||
| 300 | }; | ||||||
| 301 | # | ||||||
| 302 | sub _init{ | ||||||
| 303 | # init du hachage pour l'objet | ||||||
| 304 | my $self= attr shift; | ||||||
| 305 | $self->{base} = "http://v2.frigorifix.com/"; | ||||||
| 306 | $self->{site}= "http://v2.frigorifix.com/index.php"; | ||||||
| 307 | $self->{loginpage}= "http://v2.frigorifix.com/index.php?action=login"; | ||||||
| 308 | $self->{cookies_file}="$ENV{HOME}"."/.cookies_frigo"; | ||||||
| 309 | $self->{tmpfile_frigo}="$ENV{HOME}"."/.tmp_frigo.html"; | ||||||
| 310 | $self->{useragent}= "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007"; | ||||||
| 311 | $self->{username_frigo}=""; | ||||||
| 312 | $self->{password_frigo}=""; | ||||||
| 313 | $self->{srchadr}="http://v2.frigorifix.com/index.php?action=static&staticpage=9"; | ||||||
| 314 | $self->{motif}= undef; | ||||||
| 315 | $self->{debug}= 0; | ||||||
| 316 | $self->{logout}=\*STDOUT; | ||||||
| 317 | $self->{nbres}= 0; | ||||||
| 318 | $self->{html} = "$ENV{HOME}"."/frigorifix_report.html"; | ||||||
| 319 | $self->{dbfile} = "$ENV{HOME}"."/frigorifix_db.db"; | ||||||
| 320 | $self->{usedbm} = 0; | ||||||
| 321 | $self->{sstsav}={}; | ||||||
| 322 | # | ||||||
| 323 | # -- replace forced values | ||||||
| 324 | # | ||||||
| 325 | if (@_) | ||||||
| 326 | { | ||||||
| 327 | my %param=@_; | ||||||
| 328 | while (my($x,$y) =each(%param)){$self->{$x}=$y;}; | ||||||
| 329 | } | ||||||
| 330 | return $self; | ||||||
| 331 | }; | ||||||
| 332 | # | ||||||
| 333 | sub usedbm { | ||||||
| 334 | my $self =attr shift; | ||||||
| 335 | if (@_) {$usedbm=shift;} | ||||||
| 336 | croak " can not do a local search without motif !!\n you must provide a string thru motif() method before \n" unless $motif; | ||||||
| 337 | if ($usedbm>0){searchdbm();}; | ||||||
| 338 | return $dbfile; | ||||||
| 339 | } | ||||||
| 340 | sub dbfile { | ||||||
| 341 | my $self =attr shift; | ||||||
| 342 | if (@_) {$dbfile=shift;}; | ||||||
| 343 | return $dbfile; | ||||||
| 344 | } | ||||||
| 345 | sub useragent { | ||||||
| 346 | my $self =attr shift; | ||||||
| 347 | if (@_) {$useragent=shift;} | ||||||
| 348 | return $useragent; | ||||||
| 349 | } | ||||||
| 350 | sub username_frigo { | ||||||
| 351 | my $self =attr shift; | ||||||
| 352 | if (@_) {$username_frigo=shift;} | ||||||
| 353 | return $username_frigo; | ||||||
| 354 | } | ||||||
| 355 | sub password_frigo { | ||||||
| 356 | my $self =attr shift; | ||||||
| 357 | if (@_) {$password_frigo=shift;} | ||||||
| 358 | return $password_frigo; | ||||||
| 359 | } | ||||||
| 360 | sub nbres { | ||||||
| 361 | my $self =attr shift; | ||||||
| 362 | if (@_) {$nbres=shift;} | ||||||
| 363 | #print $fh " $nbres : sous-titres touves \n"; | ||||||
| 364 | return $nbres; | ||||||
| 365 | } | ||||||
| 366 | sub srchadr { | ||||||
| 367 | my $self =attr shift; | ||||||
| 368 | if (@_) {$srchadr=shift;} | ||||||
| 369 | return $srchadr; | ||||||
| 370 | } | ||||||
| 371 | sub logout { | ||||||
| 372 | if (@_){$logout=shift; } | ||||||
| 373 | if ($logout) | ||||||
| 374 | { open(FH , ">>", $logout) or die "$logout : $!\n"; | ||||||
| 375 | $fh=(\*FH);} | ||||||
| 376 | else | ||||||
| 377 | { $fh=(\*STDOUT);}; | ||||||
| 378 | return $logout; | ||||||
| 379 | } | ||||||
| 380 | sub debug { | ||||||
| 381 | my $self =attr shift; | ||||||
| 382 | if (@_) {$debug=shift;} | ||||||
| 383 | return $debug; | ||||||
| 384 | } | ||||||
| 385 | sub get_all_result { # alias for sstsav | ||||||
| 386 | my $self =attr shift; | ||||||
| 387 | if (!($self->sstsav())){ return undef} | ||||||
| 388 | else {return %sstsav;}; | ||||||
| 389 | } | ||||||
| 390 | sub sstsav { | ||||||
| 391 | my $self =attr shift; | ||||||
| 392 | if (%sstsav){return %sstsav;} | ||||||
| 393 | else {return undef;}; | ||||||
| 394 | } | ||||||
| 395 | sub html { | ||||||
| 396 | my $self =attr shift; | ||||||
| 397 | @_?$html=shift:$html=$self; | ||||||
| 398 | unless (-e ($html)) | ||||||
| 399 | { | ||||||
| 400 | print $fh "[DEBUG] html report file doesn't exists \n"; | ||||||
| 401 | print $fh "[DEBUG] Sucksub will create one ... \n"; | ||||||
| 402 | } | ||||||
| 403 | return $html; | ||||||
| 404 | } | ||||||
| 405 | sub cookies_file { | ||||||
| 406 | my $self =attr shift; | ||||||
| 407 | if (@_) {$cookies_file=shift;} | ||||||
| 408 | return $cookies_file; | ||||||
| 409 | } | ||||||
| 410 | |||||||
| 411 | sub loginpage { | ||||||
| 412 | my $self =attr shift; | ||||||
| 413 | if (@_) {$loginpage=shift;} | ||||||
| 414 | return $loginpage; | ||||||
| 415 | } | ||||||
| 416 | sub motif { | ||||||
| 417 | my $self =attr shift; | ||||||
| 418 | if (@_) {$motif=shift;} | ||||||
| 419 | return $motif; | ||||||
| 420 | } | ||||||
| 421 | sub search{ | ||||||
| 422 | my $self =attr shift; | ||||||
| 423 | return unless $motif; | ||||||
| 424 | our $mech = WWW::Mechanize->new(stack_depth => 1, | ||||||
| 425 | agent=>$useragent, | ||||||
| 426 | cookie_jar => HTTP::Cookies->new( | ||||||
| 427 | file => $cookies_file, | ||||||
| 428 | autosave => 1, | ||||||
| 429 | ignore_discard => 0), | ||||||
| 430 | ); | ||||||
| 431 | # login to frigorifix v2 and obtain cookie | ||||||
| 432 | unless ( -e ($cookies_file) ) | ||||||
| 433 | { | ||||||
| 434 | $mech->get($loginpage) or croak "[WARNING] can not get $base ! : $! \n"; | ||||||
| 435 | $mech->form_number(3); | ||||||
| 436 | $mech->set_fields( 'user' => $username_frigo); | ||||||
| 437 | $mech->set_fields( 'passwrd' => $password_frigo); | ||||||
| 438 | $mech->set_fields( 'cookieneverexp' => 'on' ); | ||||||
| 439 | $mech->click(); | ||||||
| 440 | }; | ||||||
| 441 | print $fh "--------------------------------------------------------------------------------\n" if ($debug); | ||||||
| 442 | print $fh "[DEBUG] begin scan on $site at : ".localtime()."\n" if ($debug); | ||||||
| 443 | print $fh "[DEBUG] searching : ".$motif." on $site \n" if ($debug); | ||||||
| 444 | print $fh "--------------------------------------------------------------------------------\n" if ($debug); | ||||||
| 445 | if ($html) | ||||||
| 446 | { | ||||||
| 447 | open (HTML,">>",$html) or warn "can not access $html : $! \n"; | ||||||
| 448 | print HTML " HTML report generated by suckSub perl module \n"; |
||||||
| 449 | print HTML "searching : ".$motif." on ".$site." at ".localtime()." \n"; |
||||||
| 450 | }; | ||||||
| 451 | #----main search process call------------------- | ||||||
| 452 | $nbres=_search_frigorifix(); | ||||||
| 453 | #----------------------------------------------- | ||||||
| 454 | |||||||
| 455 | print $fh "[DEBUG] ".$nbres." results found \n" if ($debug); | ||||||
| 456 | print $fh "--------------------------------------------------------------------------------\n" if ($debug); | ||||||
| 457 | if ($html) | ||||||
| 458 | { | ||||||
| 459 | print HTML " ".$nbres." result(s) found \n"; |
||||||
| 460 | print HTML "Html report finished at ".localtime()." \n"; |
||||||
| 461 | }; | ||||||
| 462 | if ($usedbm) | ||||||
| 463 | { | ||||||
| 464 | _savedbm(); | ||||||
| 465 | } | ||||||
| 466 | return $nbres; | ||||||
| 467 | }; | ||||||
| 468 | sub _search_frigorifix{ | ||||||
| 469 | my $self =attr shift; | ||||||
| 470 | my $jnd=0;my @sstlist=();my @sstlib=(); | ||||||
| 471 | $mech->get($srchadr) or croak "[WARNING] can not get $site ! : $! \n"; | ||||||
| 472 | #collect and links which text is in search phrase | ||||||
| 473 | my $lnk1=$mech->find_all_links(); | ||||||
| 474 | for ( my $ind=0; $ind <= $#{$lnk1} ; $ind++) | ||||||
| 475 | { | ||||||
| 476 | if ( ($lnk1->[$ind]->text() =~ m/$motif/i ) | ||||||
| 477 | and ( $lnk1->[$ind]->url_abs() =~m/v2\.frigorifix\.com/) ) | ||||||
| 478 | { | ||||||
| 479 | push @sstlist,$lnk1->[$ind]->url_abs(); | ||||||
| 480 | push @sstlib,$lnk1->[$ind]->text(); | ||||||
| 481 | print $fh "[FOUND]". $lnk1->[$ind]->text() ."\n\t". $lnk1->[$ind]->url_abs()."\n" if $debug; | ||||||
| 482 | $jnd++ ; | ||||||
| 483 | }; | ||||||
| 484 | }; | ||||||
| 485 | |||||||
| 486 | # verify all collected links one by one to found subtitle filename | ||||||
| 487 | # first we search the image-link which redirect to the download page | ||||||
| 488 | # |
||||||
| 489 | # |
||||||
| 490 | for ( my $ind2=0; $ind2 <= $#sstlist ; $ind2++) | ||||||
| 491 | { | ||||||
| 492 | $mech->get($sstlist[$ind2]); | ||||||
| 493 | # -------------------------------------------------------------------------------------------------------------- | ||||||
| 494 | # 1 : search for indirect links | ||||||
| 495 | # 2 : if not , search for direct links ( DISPO + RIP ) | ||||||
| 496 | # 3 : else : there's nothing to get | ||||||
| 497 | # note : we presuppose there's only one indirect link to download page for a page | ||||||
| 498 | # -------------------------------------------------------------------------------------------------------------- | ||||||
| 499 | my $lnk2=$mech->find_link(text_regex=>qr/(Disponible en section releases)|(v2\.frigorifix\.com\/Themes\/FrigoLand\/images\/post\/xx.gif)/); | ||||||
| 500 | #is there any indirect link to download page ? | ||||||
| 501 | print $fh "[DEBUG] SEARCH SUBTITLES LINKS FOR : [ ".$sstlib[$ind2]." ] ... \n" if ($debug); | ||||||
| 502 | if ($#{$lnk2}>0) | ||||||
| 503 | { | ||||||
| 504 | $mech->get( $lnk2->url() ); | ||||||
| 505 | _search_direct_link($sstlib[$ind2],$sstlist[$ind2]); | ||||||
| 506 | } | ||||||
| 507 | else # there's no release link here? | ||||||
| 508 | { | ||||||
| 509 | _search_direct_link($sstlib[$ind2],$sstlist[$ind2]) | ||||||
| 510 | }; | ||||||
| 511 | }; | ||||||
| 512 | return $nbres; | ||||||
| 513 | }; | ||||||
| 514 | #------------------------------------------------------------------------------------------------------------------------- | ||||||
| 515 | sub _search_direct_link{ | ||||||
| 516 | my ($alt_lib,$topic_link) =shift; | ||||||
| 517 | my $self =attr shift; | ||||||
| 518 | my $knd=0; | ||||||
| 519 | my $lnk3 = $mech->find_all_links(); | ||||||
| 520 | for ( $knd=0; $knd <= $#{$lnk3} ; $knd++) | ||||||
| 521 | { | ||||||
| 522 | if ($lnk3->[$knd]->url()=~ m/action=dlattach;topic/ ) | ||||||
| 523 | { | ||||||
| 524 | print $fh "* [ LINK FOUND ] : \t"; | ||||||
| 525 | print $fh $lnk3->[$knd]->url_abs() ."\n"; | ||||||
| 526 | $mech->get($lnk3->[$knd]->url_abs()); | ||||||
| 527 | my $libelle=$mech->res()->headers()->{'content-disposition'}."\n"; | ||||||
| 528 | $libelle = substr $libelle , 22, (length($libelle)-24); | ||||||
| 529 | print $fh "[DEBUG] extract title : ". $libelle ." \n" if ($debug); | ||||||
| 530 | print $fh "[numero : ".$nbres."]\n"if ($debug); | ||||||
| 531 | $sstsav{$lnk3->[$knd]->url_abs()}=$libelle."_".$alt_lib; | ||||||
| 532 | print $fh "\t[Alt libelle Extracted]\t".$alt_lib."\n" if ($debug); | ||||||
| 533 | print $fh "\t[Libelle Extracted]\t".$libelle."\n" if ($debug); | ||||||
| 534 | print $fh "\t[Url Extracted]\t".$lnk3->[$knd]->url_abs()."\n" if ($debug); | ||||||
| 535 | if ($html) | ||||||
| 536 | { | ||||||
| 537 | print HTML "  [$knd]->url_abs() ."\">".$libelle."_".$alt_lib." \n"; |
||||||
| 538 | }; | ||||||
| 539 | $nbres++; | ||||||
| 540 | }; | ||||||
| 541 | }; | ||||||
| 542 | return $nbres; | ||||||
| 543 | }; | ||||||
| 544 | sub searchdbm{ | ||||||
| 545 | my $self =attr shift; | ||||||
| 546 | croak " can do a local search without $motif !! \n" unless $motif; | ||||||
| 547 | if ($html) | ||||||
| 548 | { | ||||||
| 549 | print HTMLFILE " \n"; |
||||||
| 550 | print HTMLFILE "Searching on local DBM : $dbfile for $site at : ".localtime()." \n"; |
||||||
| 551 | print HTMLFILE " \n"; |
||||||
| 552 | }; | ||||||
| 553 | tie(%sstsav,'DB_File',$dbfile) | ||||||
| 554 | or die "can not access : $dbfile : $!\n"; | ||||||
| 555 | while (my ($k,$v)=each(%sstsav)) | ||||||
| 556 | { | ||||||
| 557 | if ($v =~ m/$motif/i) | ||||||
| 558 | { | ||||||
| 559 | print $fh "[FOUND Libelle ] $v \n[FOUND LINK]".$k ."\n"; | ||||||
| 560 | if ($html) | ||||||
| 561 | { | ||||||
| 562 | print HTMLFILE "".$v." \n"; |
||||||
| 563 | $nbres++; | ||||||
| 564 | }; | ||||||
| 565 | }; | ||||||
| 566 | }; | ||||||
| 567 | untie(%sstsav); | ||||||
| 568 | if ($html) | ||||||
| 569 | { | ||||||
| 570 | print HTMLFILE " ".$nbres." result(s) found \n"; |
||||||
| 571 | print HTMLFILE " html finished at ".localtime()." \n"; |
||||||
| 572 | }; | ||||||
| 573 | }; | ||||||
| 574 | #--------------------------------------------------------------------------- | ||||||
| 575 | #-- save updated hash into dbm file | ||||||
| 576 | #-- internal use only | ||||||
| 577 | #--------------------------------------------------------------------------- | ||||||
| 578 | sub _savedbm{ | ||||||
| 579 | my $self =attr shift; | ||||||
| 580 | my %hashtosave; | ||||||
| 581 | use DB_File; | ||||||
| 582 | tie (%hashtosave,'DB_File',$dbfile ) | ||||||
| 583 | or die "can not use $dbfile : $!\n"; | ||||||
| 584 | while (my ($k, $v) = each %sstsav) | ||||||
| 585 | { $hashtosave{$k}=$v;}; | ||||||
| 586 | untie(%hashtosave); | ||||||
| 587 | return; | ||||||
| 588 | }; | ||||||
| 589 | ## | ||||||
| 590 | sub END{ | ||||||
| 591 | my $self =attr shift; | ||||||
| 592 | close HTML;close FH; | ||||||
| 593 | return; | ||||||
| 594 | }; | ||||||
| 595 | 1; # End of WWW::Sucksub::Frigo |