| blib/lib/WWW/Sucksub/Vostfree.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % | 
| statement | 22 | 24 | 91.6 | 
| branch | n/a | ||
| condition | n/a | ||
| subroutine | 8 | 8 | 100.0 | 
| pod | n/a | ||
| total | 30 | 32 | 93.7 | 
| line | stmt | bran | cond | sub | pod | time | code | 
|---|---|---|---|---|---|---|---|
| 1 | package WWW::Sucksub::Vostfree; | ||||||
| 2 | =head1 NAME | ||||||
| 3 | |||||||
| 4 | WWW::Sucksub::Vostfree - automated access to vost.free.fr | ||||||
| 5 | |||||||
| 6 | =head1 VERSION | ||||||
| 7 | |||||||
| 8 | Version 0.05 | ||||||
| 9 | |||||||
| 10 | =cut | ||||||
| 11 | |||||||
| 12 | our $VERSION = '0.05'; | ||||||
| 13 | |||||||
| 14 | =head1 SYNOPSIS | ||||||
| 15 | |||||||
| 16 | SuckSub::Vostfree is a web robot based on the WWW::Mechanize Module | ||||||
| 17 | This module search and collect distant result on the vostfree.fr web database. | ||||||
| 18 | Subtitles Files urls and associated titles are stored in a dbm file. | ||||||
| 19 | Distant and local subtitles search are possible. you can use local database thru simple html generated report. | ||||||
| 20 | |||||||
| 21 | |||||||
| 22 | |||||||
| 23 | use WWW::Sucksub::Vostfree; | ||||||
| 24 | my $foo = WWW::Sucksub::Vostfree->new( | ||||||
| 25 | dbfile=> '/where/your/DBM/file is.db', | ||||||
| 26 | html =>'/where/your/html/report/is.html', | ||||||
| 27 | motif=> 'the word(s) you search', | ||||||
| 28 | debug=> 1, | ||||||
| 29 | logout => '/where/your/debug/info/are/written.log', | ||||||
| 30 | ); ); | ||||||
| 31 | $foo->search(); # collect all link corresponding to the $foo->motif() | ||||||
| 32 | $foo->motif('x'); # modify the search criteria | ||||||
| 33 | $foo->searchdbm(); # launch a search on the local database | ||||||
| 34 | |||||||
| 35 | Html report should be generated at the end of search() and searchdbm(). | ||||||
| 36 | |||||||
| 37 | =head1 CONSTRUCTOR AND STARTUP | ||||||
| 38 | |||||||
| 39 | =head2 Vostfree Constructor | ||||||
| 40 | |||||||
| 41 | The new() constructor, is associated to default values : | ||||||
| 42 | you can modify these one as shown in the synopsis example. | ||||||
| 43 | initial values you can modify are these : | ||||||
| 44 | |||||||
| 45 | my $foo = WWW::Sucksub::Vostfree->new( | ||||||
| 46 | html=> "$ENV{HOME}"."/extratitle_report.html", | ||||||
| 47 | dbfile => "$ENV{HOME}"."/extratitle_db.db", | ||||||
| 48 | motif=> undef, | ||||||
| 49 | tmpfile_vost = > "$ENV{HOME}"."/.vostfree_tmp.html", | ||||||
| 50 | debug=> 0, | ||||||
| 51 | logout => \*STDOUT | ||||||
| 52 | useragent=> "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007" | ||||||
| 53 | ); | ||||||
| 54 | |||||||
| 55 | =head3 new() constructor attributes and associated methods | ||||||
| 56 | |||||||
| 57 | Few attributes can be set thru new() attributes. | ||||||
| 58 | All attributes can be modified by corresponding methods: | ||||||
| 59 | |||||||
| 60 | $foo->WWW::Sucksub::Vostfree->new() | ||||||
| 61 | $foo->useragent() # get the useragent attribute value | ||||||
| 62 | $foo->useragent('tructruc') # set the useragent attribute value to 'tructruc' | ||||||
| 63 | |||||||
| 64 | |||||||
| 65 | =head4 cookies_file() | ||||||
| 66 | |||||||
| 67 | arg must be a file, this default value can be modified by calling the | ||||||
| 68 | |||||||
| 69 | $foo->cookies_file('/where/my/cookies/are.txt') | ||||||
| 70 | |||||||
| 71 | modify the default value positionned by the new constructor. | ||||||
| 72 | |||||||
| 73 | $foo->cookies_file() | ||||||
| 74 | |||||||
| 75 | return the actual value of the cookies file path. | ||||||
| 76 | |||||||
| 77 | =head4 useragent() | ||||||
| 78 | |||||||
| 79 | arg should be a valid useragent. There's no reason to change this default value. | ||||||
| 80 | |||||||
| 81 | $foo->useragent() | ||||||
| 82 | |||||||
| 83 | return the value of the current useragent. | ||||||
| 84 | |||||||
| 85 | =head4 tmpfile_vost() | ||||||
| 86 | |||||||
| 87 | Vostfree.pm needs to write temporary file. | ||||||
| 88 | the tmpfile_vost() method allows you to change path of this temporary file : | ||||||
| 89 | |||||||
| 90 | $foo=WWW::Sucksub::Vostfree->new( | ||||||
| 91 | ... | ||||||
| 92 | tmp_vostfile => '/where/tmp/file/is/written.html', | ||||||
| 93 | ... | ||||||
| 94 | ); | ||||||
| 95 | |||||||
| 96 | To retrieve temporary file path : | ||||||
| 97 | |||||||
| 98 | my $tmp=$foo->tmpfile_vost(); | ||||||
| 99 | |||||||
| 100 | To change temporary file path: | ||||||
| 101 | |||||||
| 102 | $foo->tmpfile_vost('/where/my/new/tmp/file/is/written.html'); | ||||||
| 103 | |||||||
| 104 | |||||||
| 105 | =head4 motif() | ||||||
| 106 | |||||||
| 107 | you should here give a real value to this function : | ||||||
| 108 | if $foo->motif stays undef, the package execution will be aborted | ||||||
| 109 | |||||||
| 110 | $foo->motif('xxx') | ||||||
| 111 | |||||||
| 112 | allows to precise that you're searching a word that contains 'xxx' | ||||||
| 113 | |||||||
| 114 | $foo->motif() | ||||||
| 115 | |||||||
| 116 | return the current value of the string you search. | ||||||
| 117 | |||||||
| 118 | =head4 debug() | ||||||
| 119 | |||||||
| 120 | WWW-Sucksub-Vostfree can produce a lot of interresting informations | ||||||
| 121 | The default value is "0" : that means that any debug informations will be written | ||||||
| 122 | on the output ( see the logout() method too.) | ||||||
| 123 | |||||||
| 124 | $foo->debug(0) # stop the product of debbugging informations | ||||||
| 125 | $foo->debug(1) # debug info will be written to the log file ( see logout() method) | ||||||
| 126 | |||||||
| 127 | =head4 logout() | ||||||
| 128 | |||||||
| 129 | if you want some debug informations, you should set the debug attribute to 1 | ||||||
| 130 | See debug() method. | ||||||
| 131 | logout() method is associated to the debug() attribute value. | ||||||
| 132 | It indicates path where debug info will be written. | ||||||
| 133 | Default value is : | ||||||
| 134 | |||||||
| 135 | $foo=WWW::Sucksub::Vostfree->new( | ||||||
| 136 | ... | ||||||
| 137 | logout => \*STDOUT, | ||||||
| 138 | ..., | ||||||
| 139 | ); | ||||||
| 140 | |||||||
| 141 | output and optional debugging info will be produced ont STDOUT | ||||||
| 142 | or any other descriptor if you give filename as arg, by example : | ||||||
| 143 | |||||||
| 144 | $foo=WWW::Sucksub::Vostfree->new( | ||||||
| 145 | ... | ||||||
| 146 | logout => '/where/my/log/is/written.txt', | ||||||
| 147 | ..., | ||||||
| 148 | ); | ||||||
| 149 | |||||||
| 150 | =head4 dbfile() | ||||||
| 151 | |||||||
| 152 | define dbm file for store and retrieving extracted informations | ||||||
| 153 | you must provide a full path to the db file to store results. | ||||||
| 154 | the search() method can not be used without defined dbm file. | ||||||
| 155 | |||||||
| 156 | $foo->dbfile('/where/your/db/is.db') | ||||||
| 157 | |||||||
| 158 | The file will should be readable/writable. | ||||||
| 159 | |||||||
| 160 | =head4 html() | ||||||
| 161 | |||||||
| 162 | Define simple html output where to write search report. | ||||||
| 163 | you must provide au full path to the html file if you want to get an html output. | ||||||
| 164 | |||||||
| 165 | $foo->html('/where/the html/report/is/written.html') | ||||||
| 166 | |||||||
| 167 | If $foo->html() is defined. you can get the value of this attribute like this : | ||||||
| 168 | |||||||
| 169 | my $html_page = $foo->html() | ||||||
| 170 | |||||||
| 171 | html file will be used for report with search and searchdbm() methods. | ||||||
| 172 | |||||||
| 173 | |||||||
| 174 | =head1 METHODS and FUNCTIONS | ||||||
| 175 | |||||||
| 176 | these functions use the precedent attributes value. | ||||||
| 177 | |||||||
| 178 | =head2 search() | ||||||
| 179 | |||||||
| 180 | this function takes no argument. | ||||||
| 181 | it alows to launch a local dbm search. | ||||||
| 182 | |||||||
| 183 | $foo-> search() | ||||||
| 184 | |||||||
| 185 | the dbm file is read to give you every couple (title,link) which corresponds to | ||||||
| 186 | the motif() pattern. | ||||||
| 187 | |||||||
| 188 | =head2 searchdbm() | ||||||
| 189 | |||||||
| 190 | this function takes no argument. | ||||||
| 191 | it alows to initiate the distant search on the web site vost.free.fr | ||||||
| 192 | the local dbm file is automatically written. Results are accumulated to the dbm file | ||||||
| 193 | you defined. | ||||||
| 194 | a search pattern must be define thru motif() method before launching search. | ||||||
| 195 | |||||||
| 196 | =head2 get_all_result() | ||||||
| 197 | |||||||
| 198 | return a hash of every couple ( title, absolute http link of subtitle file ) the search or update method returned. | ||||||
| 199 | |||||||
| 200 | my %hash=$foo->get_all_result() | ||||||
| 201 | |||||||
| 202 | |||||||
| 203 | =head1 SEE ALSO | ||||||
| 204 | |||||||
| 205 | =over 4 | ||||||
| 206 | |||||||
| 207 |  =item * L | 
||||||
| 208 | |||||||
| 209 |  =item * L | 
||||||
| 210 | |||||||
| 211 |  =item * L | 
||||||
| 212 | |||||||
| 213 |  =item * L | 
||||||
| 214 | |||||||
| 215 |  =item * L | 
||||||
| 216 | |||||||
| 217 |  =item * L | 
||||||
| 218 | |||||||
| 219 |  =item * L | 
||||||
| 220 | |||||||
| 221 | =back | ||||||
| 222 | |||||||
| 223 | =head1 AUTHOR | ||||||
| 224 | |||||||
| 225 |  Timothée foucart, C<<  | 
||||||
| 226 | |||||||
| 227 | =head1 BUGS | ||||||
| 228 | |||||||
| 229 | Please report any bugs or feature requests to | ||||||
| 230 |  C | 
||||||
| 231 |  L | 
||||||
| 232 | I will be notified, and then you'll automatically be notified of progress on | ||||||
| 233 | your bug as I make changes. | ||||||
| 234 | |||||||
| 235 | |||||||
| 236 | =head1 ACKNOWLEDGEMENTS | ||||||
| 237 | |||||||
| 238 | =head1 COPYRIGHT & LICENSE | ||||||
| 239 | |||||||
| 240 | Copyright 2005 Timothée foucart, all rights reserved. | ||||||
| 241 | |||||||
| 242 | This program is free software; you can redistribute it and/or modify it | ||||||
| 243 | under the same terms as Perl itself. | ||||||
| 244 | |||||||
| 245 | =cut | ||||||
| 246 | 1 | 1 | 20574 | use vars qw(@ISA @EXPORT $VERSION); | |||
| 1 | 4 | ||||||
| 1 | 86 | ||||||
| 247 | @ISA = qw(Exporter); | ||||||
| 248 | @EXPORT=qw( debug dbfile tmp_file_vost | ||||||
| 249 | get_all_result html logout | ||||||
| 250 | motif search searchdbm useragent ); | ||||||
| 251 | |||||||
| 252 | 1 | 1 | 6 | use strict; | |||
| 1 | 2 | ||||||
| 1 | 30 | ||||||
| 253 | 1 | 1 | 5 | use warnings; | |||
| 1 | 6 | ||||||
| 1 | 29 | ||||||
| 254 | 1 | 1 | 4 | use Carp; | |||
| 1 | 2 | ||||||
| 1 | 68 | ||||||
| 255 | 1 | 1 | 912 | use utf8; | |||
| 1 | 9 | ||||||
| 1 | 4 | ||||||
| 256 | 1 | 1 | 800 | use HTTP::Cookies; | |||
| 1 | 12114 | ||||||
| 1 | 26 | ||||||
| 257 | 1 | 1 | 1086 | use WWW::Mechanize; | |||
| 1 | 165045 | ||||||
| 1 | 42 | ||||||
| 258 | 1 | 1 | 411 | use Alias qw(attr); | |||
| 0 | |||||||
| 0 | |||||||
| 259 | use vars qw( $cookies_file $site $nbres | ||||||
| 260 | $base $dlbase $debug $useragent | ||||||
| 261 | $motif $dbfile $dbsearch %sstsav | ||||||
| 262 | $logout $fh $tmpfile_vost $html | ||||||
| 263 | ); | ||||||
| 264 | # | ||||||
| 265 | sub new{ | ||||||
| 266 | my $vostfree=shift; | ||||||
| 267 | my $classe= ref($vostfree) || $vostfree; | ||||||
| 268 | my $self={ }; | ||||||
| 269 | bless($self,$classe); | ||||||
| 270 | $self->_init(@_); | ||||||
| 271 | logout($self->{logout}); | ||||||
| 272 | |||||||
| 273 | return $self; | ||||||
| 274 | }; | ||||||
| 275 | |||||||
| 276 | sub _init{ | ||||||
| 277 | my $self= attr shift; | ||||||
| 278 | # | ||||||
| 279 | # -- init default values | ||||||
| 280 | # | ||||||
| 281 | $self->{base} = "http://vo.st.fr.free.fr/"; | ||||||
| 282 | $self->{dlbase} = "http://vo.st.fr.free.fr/sts/"; | ||||||
| 283 | $self->{site} = "http://vo.st.fr.free.fr/search.php"; | ||||||
| 284 | $self->{tmpfile_vost} = "$ENV{HOME}"."/.vostfree_tmp.html"; | ||||||
| 285 | $self->{cookies_file} = "$ENV{HOME}"."/.cookies_sksb"; | ||||||
| 286 | $self->{useragent} = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007"; | ||||||
| 287 | $self->{motif} = undef; | ||||||
| 288 | $self->{debug} = 0; | ||||||
| 289 | $self->{logout} = \*STDOUT; | ||||||
| 290 | $self->{html} = "$ENV{HOME}"."/extratitle_report.html"; | ||||||
| 291 | $self->{dbfile} = "$ENV{HOME}"."/extratitle_db.db"; | ||||||
| 292 | $self->{dbsearch} = 0; | ||||||
| 293 | $self->{sstsav} ={}; | ||||||
| 294 | $self->{nbres} = 0; | ||||||
| 295 | # | ||||||
| 296 | # -- replace forced values | ||||||
| 297 | # | ||||||
| 298 | if (@_) | ||||||
| 299 | { | ||||||
| 300 | my %param=@_; | ||||||
| 301 | while (my($x,$y) =each(%param)){$self->{$x}=$y;}; | ||||||
| 302 | } | ||||||
| 303 | return $self; | ||||||
| 304 | } | ||||||
| 305 | sub useragent { | ||||||
| 306 | my $self =attr shift; | ||||||
| 307 | if (@_) {$useragent=shift;} | ||||||
| 308 | return $useragent; | ||||||
| 309 | } | ||||||
| 310 | sub dlbase { #internal | ||||||
| 311 | my $self =attr shift; | ||||||
| 312 | if (@_) {$dlbase=shift;} | ||||||
| 313 | return $dlbase; | ||||||
| 314 | } | ||||||
| 315 | sub logout { | ||||||
| 316 | if (@_){$logout=shift; } | ||||||
| 317 | if ($logout) | ||||||
| 318 | { open(FH , ">>", $logout) or die "$logout : $!\n"; | ||||||
| 319 | $fh=(\*FH);} | ||||||
| 320 | else | ||||||
| 321 | { $fh=(\*STDOUT);}; | ||||||
| 322 | return $logout; | ||||||
| 323 | } | ||||||
| 324 | sub debug { | ||||||
| 325 | my $self =attr shift; | ||||||
| 326 | if (@_) {$debug=shift;} | ||||||
| 327 | return $debug; | ||||||
| 328 | } | ||||||
| 329 | sub cookies_file { | ||||||
| 330 | my $self =attr shift; | ||||||
| 331 | if (@_) {$cookies_file=shift;} | ||||||
| 332 | return $cookies_file; | ||||||
| 333 | } | ||||||
| 334 | sub tmpfile_vost { | ||||||
| 335 | my $self =attr shift; | ||||||
| 336 | if (@_) {$tmpfile_vost=shift;} | ||||||
| 337 | return $tmpfile_vost; | ||||||
| 338 | } | ||||||
| 339 | sub sstsav { | ||||||
| 340 | my $self =attr shift; | ||||||
| 341 | if (@_) {%sstsav=shift;} | ||||||
| 342 | return %sstsav; | ||||||
| 343 | } | ||||||
| 344 | sub dbfile { | ||||||
| 345 | my $self =attr shift; | ||||||
| 346 | if (@_) {$dbfile=shift;}; | ||||||
| 347 | return $dbfile; | ||||||
| 348 | } | ||||||
| 349 | sub dbsearch { | ||||||
| 350 | my $self =attr shift; | ||||||
| 351 | if (@_) {$dbsearch=shift;} | ||||||
| 352 | croak " can not do a local search without motif !!\n you must provide a string thru motif() method before \n" | ||||||
| 353 | unless $motif; | ||||||
| 354 | if ($dbsearch>0){searchdbm();}; | ||||||
| 355 | return $dbfile; | ||||||
| 356 | } | ||||||
| 357 | #------------------------------------------------------------------------------------------------------------------------ | ||||||
| 358 | # alias method for ->sstsav | ||||||
| 359 | #------------------------------------------------------------------------------------------------------------------------ | ||||||
| 360 | sub get_all_result { | ||||||
| 361 | my $self =attr shift; | ||||||
| 362 | if ($self->sstsav() ==undef){ return undef} | ||||||
| 363 | else {return %sstsav;}; | ||||||
| 364 | }; | ||||||
| 365 | sub html { | ||||||
| 366 | my $self =attr shift; | ||||||
| 367 | if (@_) {$html=shift;} | ||||||
| 368 | else {$html=$self;}; | ||||||
| 369 | unless (-e ($html)) | ||||||
| 370 | { | ||||||
| 371 | print $fh "[DEBUG] html report file doesn't exists \n"; | ||||||
| 372 | print $fh "[DEBUG] Sucksub will create one ... \n"; | ||||||
| 373 | } | ||||||
| 374 | return $html; | ||||||
| 375 | } | ||||||
| 376 | sub motif { | ||||||
| 377 | my $self =attr shift; | ||||||
| 378 | if (@_) {$motif=shift;}; | ||||||
| 379 | if ((length($motif))<2) | ||||||
| 380 | { print $fh "[INFO] motif is : ".$motif." [length = ".length($motif)." ]\n" if $debug; | ||||||
| 381 | croak "[FATAL WARNING] your search motif should be longer than 2 characters ! \n It's only ".length($motif)." long !\n"; | ||||||
| 382 | }; | ||||||
| 383 | return $motif; | ||||||
| 384 | } | ||||||
| 385 | sub search{ | ||||||
| 386 | my $self =attr shift; | ||||||
| 387 | croak "can not search without motif!\n" unless $motif; | ||||||
| 388 | motif($motif);#obtain warn eventually | ||||||
| 389 | our $mech = WWW::Mechanize->new(agent=>$useragent, | ||||||
| 390 | cookie_jar => HTTP::Cookies->new( | ||||||
| 391 | file => $cookies_file, | ||||||
| 392 | autosave => 1, | ||||||
| 393 | ignore_discard => 0, | ||||||
| 394 | ), | ||||||
| 395 | ); | ||||||
| 396 | $mech->stack_depth(1); | ||||||
| 397 | if ($html) | ||||||
| 398 | { | ||||||
| 399 | open (HTMLFILE,">>",$html) or warn "can not access $html : $! \n"; | ||||||
| 400 |  		print HTMLFILE " Generated by suckSub perl module\n";  | 
||||||
| 401 |  		print HTMLFILE "searching : ".$motif." on ".$site." \n";  | 
||||||
| 402 |  		print HTMLFILE " ".localtime()." \n";  | 
||||||
| 403 | }; | ||||||
| 404 | print $fh "--------------------------------------------------------------------------------\n" if ($debug); | ||||||
| 405 | print $fh "[DEBUG] begin scan on $site at : ".localtime()."\n" if ($debug); | ||||||
| 406 | print $fh "[DEBUG] searching : ".$motif." on $site \n" if ($debug); | ||||||
| 407 | print $fh "--------------------------------------------------------------------------------\n" if ($debug); | ||||||
| 408 | if ($debug) {print $fh "\n[DEBUG \t VO.ST.FREE.FR ][BEGIN SCAN]\n";}; | ||||||
| 409 | # we're obliged to get the entry page to avoid a redirection (because of no cookies ) | ||||||
| 410 | $mech->get($base) or die "[WARNING] can not get $base ! : $! \n"; | ||||||
| 411 | $mech->get($site) or die "[WARNING] can not get $site ! : $! \n"; | ||||||
| 412 | $mech->form_number(1); | ||||||
| 413 | $mech->set_fields( search => $motif ); | ||||||
| 414 | $mech->click(); | ||||||
| 415 | if ($debug) { print $fh "[DEBUG \t GET URL \t : \t ".$mech->uri()."]\n" if ($debug);}; | ||||||
| 416 | $nbres = parse_vostfree($mech); | ||||||
| 417 | if (!$nbres){$nbres=0}; | ||||||
| 418 | print $fh "[DEBUG \t : $nbres trouves sur $base]\n" if ($debug); | ||||||
| 419 | print $fh "[END]\n" if ($debug); | ||||||
| 420 | print $fh "--------------------------------------------------------------------------------\n\n" if ($debug); | ||||||
| 421 | |||||||
| 422 | return; | ||||||
| 423 | }; | ||||||
| 424 | # | ||||||
| 425 | #--- this function parses only one result page and return all subtitles url. | ||||||
| 426 | # | ||||||
| 427 | sub parse_vostfree{ #INTERNAL | ||||||
| 428 | my $mech=$_[0]; | ||||||
| 429 | my $jnd=0; my $jnd2=0; | ||||||
| 430 | my $lnk=$mech->find_all_links(); | ||||||
| 431 | my $nbl = $#{$lnk}; my $ind=0; | ||||||
| 432 | # rechercher les liens des reponses de la rechercher | ||||||
| 433 | my @sstlist=[];# memo array | ||||||
| 434 | for ( my $ind=0; $ind <= $#{$lnk} ; $ind++) | ||||||
| 435 | { | ||||||
| 436 | # search and memorize the subtitle label | ||||||
| 437 | #http://vo.st.fr.free.fr/edit.php?id=142 | ||||||
| 438 | if ($lnk->[$ind]->url() =~ m/edit\.php\?id/ ) | ||||||
| 439 | { | ||||||
| 440 | $sstlist[$jnd] = $lnk->[$ind]->url_abs(); | ||||||
| 441 | my $libelle = $lnk->[$ind]->text(); | ||||||
| 442 | print $fh "[FOUND PAGE] ". $lnk->[$ind]->url_abs()."\n" if $debug; | ||||||
| 443 | print $fh "[FOUND TITLE]". $lnk->[$ind]->text() ."\n" if $debug; | ||||||
| 444 | # get the link file url | ||||||
| 445 | my $result2 = $mech->get( $sstlist[$jnd] ); | ||||||
| 446 | print $fh "[DEBUG] GET ". $mech->uri()."\n" if ($debug); | ||||||
| 447 | # parse the result page : link is in a javascript popup | ||||||
| 448 | unlink ($tmpfile_vost) unless (!(-f $tmpfile_vost)); | ||||||
| 449 | open (TAMPON,'>', $tmpfile_vost) or croak "can not open $tmpfile_vost : $! \n"; | ||||||
| 450 | print TAMPON $mech->response->as_string; | ||||||
| 451 | close TAMPON; | ||||||
| 452 | # save one or more subtitles links | ||||||
| 453 | my @lnkdl=vosftree_dlfile($tmpfile_vost); | ||||||
| 454 | for (my $lnd=0;$lnd<=$#lnkdl;$lnd++) | ||||||
| 455 | { | ||||||
| 456 | $sstsav{$lnkdl[$lnd]} = $libelle; | ||||||
| 457 | if ($lnd>0){$libelle=$sstsav{$lnkdl[$lnd]}."_(".$lnd.")";}; | ||||||
| 458 | if ($html) | ||||||
| 459 | { | ||||||
| 460 |  						print HTMLFILE  "".$libelle." \n";  | 
||||||
| 461 | }; | ||||||
| 462 | if ($debug) | ||||||
| 463 | { | ||||||
| 464 | print $fh "[FOUND LINK ]".$lnkdl[$lnd]."\n" if $debug; | ||||||
| 465 | }; | ||||||
| 466 | |||||||
| 467 | if ($dbfile) | ||||||
| 468 | { | ||||||
| 469 | savedbm(); | ||||||
| 470 | print $fh "[DBM SAVE] ".$lnkdl[$lnd]."\n" if $debug; | ||||||
| 471 | }; | ||||||
| 472 | $nbres++; | ||||||
| 473 | }; | ||||||
| 474 | }; | ||||||
| 475 | $jnd++ ; | ||||||
| 476 | }; | ||||||
| 477 | # verify we get any result for the search request | ||||||
| 478 | if ( $jnd < 1) | ||||||
| 479 | { | ||||||
| 480 | print $fh " NO RESULT FOUND for $motif on http://vostfree.fr \n"; | ||||||
| 481 | if ($html) | ||||||
| 482 | { | ||||||
| 483 |  			print HTMLFILE  " NO RESULT FOUND for  $motif on http://vostfree.fr  \n";  | 
||||||
| 484 |  			print HTMLFILE  " ending scan on $site  at : ".localtime()." \n";  | 
||||||
| 485 | }; | ||||||
| 486 | return; | ||||||
| 487 | } | ||||||
| 488 | else | ||||||
| 489 | { | ||||||
| 490 | print $fh "[DEBUG] : ". $nbres ." links on distant web search \n" if ($debug); | ||||||
| 491 | print $fh "[DEBUG] ending scan on $site at : ".localtime()."\n" if ($debug); | ||||||
| 492 | if ($html) | ||||||
| 493 | { | ||||||
| 494 |  			print HTMLFILE  "". $nbres ." result(s) for pattern ".$motif." on http://vostfree.fr  \n";  | 
||||||
| 495 |  			print HTMLFILE  " ending scan on $site  at : ".localtime()." \n";  | 
||||||
| 496 | }; | ||||||
| 497 | }; | ||||||
| 498 | if (-e $tmpfile_vost){unlink $tmpfile_vost}; | ||||||
| 499 | return $nbres; | ||||||
| 500 | } | ||||||
| 501 | sub searchdbm{ | ||||||
| 502 | my $self =attr shift; | ||||||
| 503 | croak " can do a local search without $motif !! \n" unless $motif; | ||||||
| 504 |  	print HTMLFILE " \n";  | 
||||||
| 505 |  	print HTMLFILE "Searching on local DBM : $dbfile for $site  at : ".localtime()." \n";  | 
||||||
| 506 |  	print HTMLFILE " \n";  | 
||||||
| 507 | tie(%sstsav,'DB_File',$dbfile) | ||||||
| 508 | or die "can not access : $dbfile : $!\n"; | ||||||
| 509 | while (my ($k,$v)=each(%sstsav)) | ||||||
| 510 | { | ||||||
| 511 | if ($v =~ m/$motif/i) | ||||||
| 512 | { | ||||||
| 513 | print $fh "[FOUND Libelle ] $v \n[FOUND LINK]".$k ."\n"; | ||||||
| 514 | if ($html) | ||||||
| 515 | { | ||||||
| 516 |  	  			print HTMLFILE "".$v." \n";  | 
||||||
| 517 | $nbres++; | ||||||
| 518 | }; | ||||||
| 519 | }; | ||||||
| 520 | }; | ||||||
| 521 | untie(%sstsav); | ||||||
| 522 | if ($html) | ||||||
| 523 | { | ||||||
| 524 |  		print HTMLFILE " ".$nbres." result(s) found \n";  | 
||||||
| 525 |  		print HTMLFILE " html finished at ".localtime()." \n";  | 
||||||
| 526 | }; | ||||||
| 527 | } | ||||||
| 528 | #------------------------------------------------------------------------------- | ||||||
| 529 | # parse javascript link and rebuild url links | ||||||
| 530 | #------------------------------------------------------------------------------- | ||||||
| 531 | |||||||
| 532 | sub vosftree_dlfile{ #INTERNAL | ||||||
| 533 | use HTML::Parser; | ||||||
| 534 | my $content_vost = $_[0]; | ||||||
| 535 | our @urlfile=(); | ||||||
| 536 | my $pf = HTML::Parser->new(); | ||||||
| 537 | $pf->handler( start => \&start_vostf, "tagname,attr" ); | ||||||
| 538 | $pf->parse_file($content_vost); | ||||||
| 539 | $pf->eof; | ||||||
| 540 | # | ||||||
| 541 | sub start_vostf{ | ||||||
| 542 | my ( $tag, $args ) = @_; | ||||||
| 543 | return unless ($tag eq 'input'); | ||||||
| 544 | return unless ($args->{value}); | ||||||
| 545 | return unless ($args->{value} =~ m/T?l?charger/ ); | ||||||
| 546 | return unless ($args->{onclick}); | ||||||
| 547 | my $click = $args->{onclick}; | ||||||
| 548 | my @mot=split(/[']/,$click);my $filename=$mot[1];#'<--fucking eclipse!! | ||||||
| 549 | $filename=~ s/ /%20/g; $filename=$dlbase.$filename; | ||||||
| 550 | print $fh "[PARSER FOUND LINK ] ".$filename."\n" if $debug; | ||||||
| 551 | push @urlfile,$filename; | ||||||
| 552 | |||||||
| 553 | }; | ||||||
| 554 | return @urlfile; | ||||||
| 555 | }; | ||||||
| 556 | #--------------------------------------------------------------------------- | ||||||
| 557 | #-- save updated hash into dbm file | ||||||
| 558 | #-- internal use only | ||||||
| 559 | #--------------------------------------------------------------------------- | ||||||
| 560 | sub savedbm{ | ||||||
| 561 | my $self =attr shift; | ||||||
| 562 | my %hashtosave; | ||||||
| 563 | use DB_File; | ||||||
| 564 | tie (%hashtosave,'DB_File',$dbfile ) | ||||||
| 565 | or die "can not use $dbfile : $!\n"; | ||||||
| 566 | while (my ($k, $v) = each %sstsav) | ||||||
| 567 | { $hashtosave{$k}=$v;}; | ||||||
| 568 | untie(%hashtosave); | ||||||
| 569 | return; | ||||||
| 570 | }; | ||||||
| 571 | #-------------------------------------------------------------- | ||||||
| 572 | # End code | ||||||
| 573 | #-------------------------------------------------------------- | ||||||
| 574 | sub END{ | ||||||
| 575 | close HTMLFILE if $html; | ||||||
| 576 | }; | ||||||
| 577 | #end vostfree | ||||||
| 578 | # | ||||||
| 579 | 1; |