blib/lib/WWW/Sucksub/Vostfree.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 22 | 24 | 91.6 |
branch | n/a | ||
condition | n/a | ||
subroutine | 8 | 8 | 100.0 |
pod | n/a | ||
total | 30 | 32 | 93.7 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package WWW::Sucksub::Vostfree; | ||||||
2 | =head1 NAME | ||||||
3 | |||||||
4 | WWW::Sucksub::Vostfree - automated access to vost.free.fr | ||||||
5 | |||||||
6 | =head1 VERSION | ||||||
7 | |||||||
8 | Version 0.05 | ||||||
9 | |||||||
10 | =cut | ||||||
11 | |||||||
12 | our $VERSION = '0.05'; | ||||||
13 | |||||||
14 | =head1 SYNOPSIS | ||||||
15 | |||||||
16 | SuckSub::Vostfree is a web robot based on the WWW::Mechanize Module | ||||||
17 | This module search and collect distant result on the vostfree.fr web database. | ||||||
18 | Subtitles Files urls and associated titles are stored in a dbm file. | ||||||
19 | Distant and local subtitles search are possible. you can use local database thru simple html generated report. | ||||||
20 | |||||||
21 | |||||||
22 | |||||||
23 | use WWW::Sucksub::Vostfree; | ||||||
24 | my $foo = WWW::Sucksub::Vostfree->new( | ||||||
25 | dbfile=> '/where/your/DBM/file is.db', | ||||||
26 | html =>'/where/your/html/report/is.html', | ||||||
27 | motif=> 'the word(s) you search', | ||||||
28 | debug=> 1, | ||||||
29 | logout => '/where/your/debug/info/are/written.log', | ||||||
30 | ); ); | ||||||
31 | $foo->search(); # collect all link corresponding to the $foo->motif() | ||||||
32 | $foo->motif('x'); # modify the search criteria | ||||||
33 | $foo->searchdbm(); # launch a search on the local database | ||||||
34 | |||||||
35 | Html report should be generated at the end of search() and searchdbm(). | ||||||
36 | |||||||
37 | =head1 CONSTRUCTOR AND STARTUP | ||||||
38 | |||||||
39 | =head2 Vostfree Constructor | ||||||
40 | |||||||
41 | The new() constructor, is associated to default values : | ||||||
42 | you can modify these one as shown in the synopsis example. | ||||||
43 | initial values you can modify are these : | ||||||
44 | |||||||
45 | my $foo = WWW::Sucksub::Vostfree->new( | ||||||
46 | html=> "$ENV{HOME}"."/extratitle_report.html", | ||||||
47 | dbfile => "$ENV{HOME}"."/extratitle_db.db", | ||||||
48 | motif=> undef, | ||||||
49 | tmpfile_vost = > "$ENV{HOME}"."/.vostfree_tmp.html", | ||||||
50 | debug=> 0, | ||||||
51 | logout => \*STDOUT | ||||||
52 | useragent=> "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007" | ||||||
53 | ); | ||||||
54 | |||||||
55 | =head3 new() constructor attributes and associated methods | ||||||
56 | |||||||
57 | Few attributes can be set thru new() attributes. | ||||||
58 | All attributes can be modified by corresponding methods: | ||||||
59 | |||||||
60 | $foo->WWW::Sucksub::Vostfree->new() | ||||||
61 | $foo->useragent() # get the useragent attribute value | ||||||
62 | $foo->useragent('tructruc') # set the useragent attribute value to 'tructruc' | ||||||
63 | |||||||
64 | |||||||
65 | =head4 cookies_file() | ||||||
66 | |||||||
67 | arg must be a file, this default value can be modified by calling the | ||||||
68 | |||||||
69 | $foo->cookies_file('/where/my/cookies/are.txt') | ||||||
70 | |||||||
71 | modify the default value positionned by the new constructor. | ||||||
72 | |||||||
73 | $foo->cookies_file() | ||||||
74 | |||||||
75 | return the actual value of the cookies file path. | ||||||
76 | |||||||
77 | =head4 useragent() | ||||||
78 | |||||||
79 | arg should be a valid useragent. There's no reason to change this default value. | ||||||
80 | |||||||
81 | $foo->useragent() | ||||||
82 | |||||||
83 | return the value of the current useragent. | ||||||
84 | |||||||
85 | =head4 tmpfile_vost() | ||||||
86 | |||||||
87 | Vostfree.pm needs to write temporary file. | ||||||
88 | the tmpfile_vost() method allows you to change path of this temporary file : | ||||||
89 | |||||||
90 | $foo=WWW::Sucksub::Vostfree->new( | ||||||
91 | ... | ||||||
92 | tmp_vostfile => '/where/tmp/file/is/written.html', | ||||||
93 | ... | ||||||
94 | ); | ||||||
95 | |||||||
96 | To retrieve temporary file path : | ||||||
97 | |||||||
98 | my $tmp=$foo->tmpfile_vost(); | ||||||
99 | |||||||
100 | To change temporary file path: | ||||||
101 | |||||||
102 | $foo->tmpfile_vost('/where/my/new/tmp/file/is/written.html'); | ||||||
103 | |||||||
104 | |||||||
105 | =head4 motif() | ||||||
106 | |||||||
107 | you should here give a real value to this function : | ||||||
108 | if $foo->motif stays undef, the package execution will be aborted | ||||||
109 | |||||||
110 | $foo->motif('xxx') | ||||||
111 | |||||||
112 | allows to precise that you're searching a word that contains 'xxx' | ||||||
113 | |||||||
114 | $foo->motif() | ||||||
115 | |||||||
116 | return the current value of the string you search. | ||||||
117 | |||||||
118 | =head4 debug() | ||||||
119 | |||||||
120 | WWW-Sucksub-Vostfree can produce a lot of interresting informations | ||||||
121 | The default value is "0" : that means that any debug informations will be written | ||||||
122 | on the output ( see the logout() method too.) | ||||||
123 | |||||||
124 | $foo->debug(0) # stop the product of debbugging informations | ||||||
125 | $foo->debug(1) # debug info will be written to the log file ( see logout() method) | ||||||
126 | |||||||
127 | =head4 logout() | ||||||
128 | |||||||
129 | if you want some debug informations, you should set the debug attribute to 1 | ||||||
130 | See debug() method. | ||||||
131 | logout() method is associated to the debug() attribute value. | ||||||
132 | It indicates path where debug info will be written. | ||||||
133 | Default value is : | ||||||
134 | |||||||
135 | $foo=WWW::Sucksub::Vostfree->new( | ||||||
136 | ... | ||||||
137 | logout => \*STDOUT, | ||||||
138 | ..., | ||||||
139 | ); | ||||||
140 | |||||||
141 | output and optional debugging info will be produced ont STDOUT | ||||||
142 | or any other descriptor if you give filename as arg, by example : | ||||||
143 | |||||||
144 | $foo=WWW::Sucksub::Vostfree->new( | ||||||
145 | ... | ||||||
146 | logout => '/where/my/log/is/written.txt', | ||||||
147 | ..., | ||||||
148 | ); | ||||||
149 | |||||||
150 | =head4 dbfile() | ||||||
151 | |||||||
152 | define dbm file for store and retrieving extracted informations | ||||||
153 | you must provide a full path to the db file to store results. | ||||||
154 | the search() method can not be used without defined dbm file. | ||||||
155 | |||||||
156 | $foo->dbfile('/where/your/db/is.db') | ||||||
157 | |||||||
158 | The file will should be readable/writable. | ||||||
159 | |||||||
160 | =head4 html() | ||||||
161 | |||||||
162 | Define simple html output where to write search report. | ||||||
163 | you must provide au full path to the html file if you want to get an html output. | ||||||
164 | |||||||
165 | $foo->html('/where/the html/report/is/written.html') | ||||||
166 | |||||||
167 | If $foo->html() is defined. you can get the value of this attribute like this : | ||||||
168 | |||||||
169 | my $html_page = $foo->html() | ||||||
170 | |||||||
171 | html file will be used for report with search and searchdbm() methods. | ||||||
172 | |||||||
173 | |||||||
174 | =head1 METHODS and FUNCTIONS | ||||||
175 | |||||||
176 | these functions use the precedent attributes value. | ||||||
177 | |||||||
178 | =head2 search() | ||||||
179 | |||||||
180 | this function takes no argument. | ||||||
181 | it alows to launch a local dbm search. | ||||||
182 | |||||||
183 | $foo-> search() | ||||||
184 | |||||||
185 | the dbm file is read to give you every couple (title,link) which corresponds to | ||||||
186 | the motif() pattern. | ||||||
187 | |||||||
188 | =head2 searchdbm() | ||||||
189 | |||||||
190 | this function takes no argument. | ||||||
191 | it alows to initiate the distant search on the web site vost.free.fr | ||||||
192 | the local dbm file is automatically written. Results are accumulated to the dbm file | ||||||
193 | you defined. | ||||||
194 | a search pattern must be define thru motif() method before launching search. | ||||||
195 | |||||||
196 | =head2 get_all_result() | ||||||
197 | |||||||
198 | return a hash of every couple ( title, absolute http link of subtitle file ) the search or update method returned. | ||||||
199 | |||||||
200 | my %hash=$foo->get_all_result() | ||||||
201 | |||||||
202 | |||||||
203 | =head1 SEE ALSO | ||||||
204 | |||||||
205 | =over 4 | ||||||
206 | |||||||
207 | =item * L |
||||||
208 | |||||||
209 | =item * L |
||||||
210 | |||||||
211 | =item * L |
||||||
212 | |||||||
213 | =item * L |
||||||
214 | |||||||
215 | =item * L |
||||||
216 | |||||||
217 | =item * L |
||||||
218 | |||||||
219 | =item * L |
||||||
220 | |||||||
221 | =back | ||||||
222 | |||||||
223 | =head1 AUTHOR | ||||||
224 | |||||||
225 | Timothée foucart, C<< |
||||||
226 | |||||||
227 | =head1 BUGS | ||||||
228 | |||||||
229 | Please report any bugs or feature requests to | ||||||
230 | C |
||||||
231 | L |
||||||
232 | I will be notified, and then you'll automatically be notified of progress on | ||||||
233 | your bug as I make changes. | ||||||
234 | |||||||
235 | |||||||
236 | =head1 ACKNOWLEDGEMENTS | ||||||
237 | |||||||
238 | =head1 COPYRIGHT & LICENSE | ||||||
239 | |||||||
240 | Copyright 2005 Timothée foucart, all rights reserved. | ||||||
241 | |||||||
242 | This program is free software; you can redistribute it and/or modify it | ||||||
243 | under the same terms as Perl itself. | ||||||
244 | |||||||
245 | =cut | ||||||
246 | 1 | 1 | 20574 | use vars qw(@ISA @EXPORT $VERSION); | |||
1 | 4 | ||||||
1 | 86 | ||||||
247 | @ISA = qw(Exporter); | ||||||
248 | @EXPORT=qw( debug dbfile tmp_file_vost | ||||||
249 | get_all_result html logout | ||||||
250 | motif search searchdbm useragent ); | ||||||
251 | |||||||
252 | 1 | 1 | 6 | use strict; | |||
1 | 2 | ||||||
1 | 30 | ||||||
253 | 1 | 1 | 5 | use warnings; | |||
1 | 6 | ||||||
1 | 29 | ||||||
254 | 1 | 1 | 4 | use Carp; | |||
1 | 2 | ||||||
1 | 68 | ||||||
255 | 1 | 1 | 912 | use utf8; | |||
1 | 9 | ||||||
1 | 4 | ||||||
256 | 1 | 1 | 800 | use HTTP::Cookies; | |||
1 | 12114 | ||||||
1 | 26 | ||||||
257 | 1 | 1 | 1086 | use WWW::Mechanize; | |||
1 | 165045 | ||||||
1 | 42 | ||||||
258 | 1 | 1 | 411 | use Alias qw(attr); | |||
0 | |||||||
0 | |||||||
259 | use vars qw( $cookies_file $site $nbres | ||||||
260 | $base $dlbase $debug $useragent | ||||||
261 | $motif $dbfile $dbsearch %sstsav | ||||||
262 | $logout $fh $tmpfile_vost $html | ||||||
263 | ); | ||||||
264 | # | ||||||
265 | sub new{ | ||||||
266 | my $vostfree=shift; | ||||||
267 | my $classe= ref($vostfree) || $vostfree; | ||||||
268 | my $self={ }; | ||||||
269 | bless($self,$classe); | ||||||
270 | $self->_init(@_); | ||||||
271 | logout($self->{logout}); | ||||||
272 | |||||||
273 | return $self; | ||||||
274 | }; | ||||||
275 | |||||||
276 | sub _init{ | ||||||
277 | my $self= attr shift; | ||||||
278 | # | ||||||
279 | # -- init default values | ||||||
280 | # | ||||||
281 | $self->{base} = "http://vo.st.fr.free.fr/"; | ||||||
282 | $self->{dlbase} = "http://vo.st.fr.free.fr/sts/"; | ||||||
283 | $self->{site} = "http://vo.st.fr.free.fr/search.php"; | ||||||
284 | $self->{tmpfile_vost} = "$ENV{HOME}"."/.vostfree_tmp.html"; | ||||||
285 | $self->{cookies_file} = "$ENV{HOME}"."/.cookies_sksb"; | ||||||
286 | $self->{useragent} = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007"; | ||||||
287 | $self->{motif} = undef; | ||||||
288 | $self->{debug} = 0; | ||||||
289 | $self->{logout} = \*STDOUT; | ||||||
290 | $self->{html} = "$ENV{HOME}"."/extratitle_report.html"; | ||||||
291 | $self->{dbfile} = "$ENV{HOME}"."/extratitle_db.db"; | ||||||
292 | $self->{dbsearch} = 0; | ||||||
293 | $self->{sstsav} ={}; | ||||||
294 | $self->{nbres} = 0; | ||||||
295 | # | ||||||
296 | # -- replace forced values | ||||||
297 | # | ||||||
298 | if (@_) | ||||||
299 | { | ||||||
300 | my %param=@_; | ||||||
301 | while (my($x,$y) =each(%param)){$self->{$x}=$y;}; | ||||||
302 | } | ||||||
303 | return $self; | ||||||
304 | } | ||||||
305 | sub useragent { | ||||||
306 | my $self =attr shift; | ||||||
307 | if (@_) {$useragent=shift;} | ||||||
308 | return $useragent; | ||||||
309 | } | ||||||
310 | sub dlbase { #internal | ||||||
311 | my $self =attr shift; | ||||||
312 | if (@_) {$dlbase=shift;} | ||||||
313 | return $dlbase; | ||||||
314 | } | ||||||
315 | sub logout { | ||||||
316 | if (@_){$logout=shift; } | ||||||
317 | if ($logout) | ||||||
318 | { open(FH , ">>", $logout) or die "$logout : $!\n"; | ||||||
319 | $fh=(\*FH);} | ||||||
320 | else | ||||||
321 | { $fh=(\*STDOUT);}; | ||||||
322 | return $logout; | ||||||
323 | } | ||||||
324 | sub debug { | ||||||
325 | my $self =attr shift; | ||||||
326 | if (@_) {$debug=shift;} | ||||||
327 | return $debug; | ||||||
328 | } | ||||||
329 | sub cookies_file { | ||||||
330 | my $self =attr shift; | ||||||
331 | if (@_) {$cookies_file=shift;} | ||||||
332 | return $cookies_file; | ||||||
333 | } | ||||||
334 | sub tmpfile_vost { | ||||||
335 | my $self =attr shift; | ||||||
336 | if (@_) {$tmpfile_vost=shift;} | ||||||
337 | return $tmpfile_vost; | ||||||
338 | } | ||||||
339 | sub sstsav { | ||||||
340 | my $self =attr shift; | ||||||
341 | if (@_) {%sstsav=shift;} | ||||||
342 | return %sstsav; | ||||||
343 | } | ||||||
344 | sub dbfile { | ||||||
345 | my $self =attr shift; | ||||||
346 | if (@_) {$dbfile=shift;}; | ||||||
347 | return $dbfile; | ||||||
348 | } | ||||||
349 | sub dbsearch { | ||||||
350 | my $self =attr shift; | ||||||
351 | if (@_) {$dbsearch=shift;} | ||||||
352 | croak " can not do a local search without motif !!\n you must provide a string thru motif() method before \n" | ||||||
353 | unless $motif; | ||||||
354 | if ($dbsearch>0){searchdbm();}; | ||||||
355 | return $dbfile; | ||||||
356 | } | ||||||
357 | #------------------------------------------------------------------------------------------------------------------------ | ||||||
358 | # alias method for ->sstsav | ||||||
359 | #------------------------------------------------------------------------------------------------------------------------ | ||||||
360 | sub get_all_result { | ||||||
361 | my $self =attr shift; | ||||||
362 | if ($self->sstsav() ==undef){ return undef} | ||||||
363 | else {return %sstsav;}; | ||||||
364 | }; | ||||||
365 | sub html { | ||||||
366 | my $self =attr shift; | ||||||
367 | if (@_) {$html=shift;} | ||||||
368 | else {$html=$self;}; | ||||||
369 | unless (-e ($html)) | ||||||
370 | { | ||||||
371 | print $fh "[DEBUG] html report file doesn't exists \n"; | ||||||
372 | print $fh "[DEBUG] Sucksub will create one ... \n"; | ||||||
373 | } | ||||||
374 | return $html; | ||||||
375 | } | ||||||
376 | sub motif { | ||||||
377 | my $self =attr shift; | ||||||
378 | if (@_) {$motif=shift;}; | ||||||
379 | if ((length($motif))<2) | ||||||
380 | { print $fh "[INFO] motif is : ".$motif." [length = ".length($motif)." ]\n" if $debug; | ||||||
381 | croak "[FATAL WARNING] your search motif should be longer than 2 characters ! \n It's only ".length($motif)." long !\n"; | ||||||
382 | }; | ||||||
383 | return $motif; | ||||||
384 | } | ||||||
385 | sub search{ | ||||||
386 | my $self =attr shift; | ||||||
387 | croak "can not search without motif!\n" unless $motif; | ||||||
388 | motif($motif);#obtain warn eventually | ||||||
389 | our $mech = WWW::Mechanize->new(agent=>$useragent, | ||||||
390 | cookie_jar => HTTP::Cookies->new( | ||||||
391 | file => $cookies_file, | ||||||
392 | autosave => 1, | ||||||
393 | ignore_discard => 0, | ||||||
394 | ), | ||||||
395 | ); | ||||||
396 | $mech->stack_depth(1); | ||||||
397 | if ($html) | ||||||
398 | { | ||||||
399 | open (HTMLFILE,">>",$html) or warn "can not access $html : $! \n"; | ||||||
400 | print HTMLFILE " Generated by suckSub perl module\n"; |
||||||
401 | print HTMLFILE "searching : ".$motif." on ".$site." \n"; |
||||||
402 | print HTMLFILE " ".localtime()." \n"; |
||||||
403 | }; | ||||||
404 | print $fh "--------------------------------------------------------------------------------\n" if ($debug); | ||||||
405 | print $fh "[DEBUG] begin scan on $site at : ".localtime()."\n" if ($debug); | ||||||
406 | print $fh "[DEBUG] searching : ".$motif." on $site \n" if ($debug); | ||||||
407 | print $fh "--------------------------------------------------------------------------------\n" if ($debug); | ||||||
408 | if ($debug) {print $fh "\n[DEBUG \t VO.ST.FREE.FR ][BEGIN SCAN]\n";}; | ||||||
409 | # we're obliged to get the entry page to avoid a redirection (because of no cookies ) | ||||||
410 | $mech->get($base) or die "[WARNING] can not get $base ! : $! \n"; | ||||||
411 | $mech->get($site) or die "[WARNING] can not get $site ! : $! \n"; | ||||||
412 | $mech->form_number(1); | ||||||
413 | $mech->set_fields( search => $motif ); | ||||||
414 | $mech->click(); | ||||||
415 | if ($debug) { print $fh "[DEBUG \t GET URL \t : \t ".$mech->uri()."]\n" if ($debug);}; | ||||||
416 | $nbres = parse_vostfree($mech); | ||||||
417 | if (!$nbres){$nbres=0}; | ||||||
418 | print $fh "[DEBUG \t : $nbres trouves sur $base]\n" if ($debug); | ||||||
419 | print $fh "[END]\n" if ($debug); | ||||||
420 | print $fh "--------------------------------------------------------------------------------\n\n" if ($debug); | ||||||
421 | |||||||
422 | return; | ||||||
423 | }; | ||||||
424 | # | ||||||
425 | #--- this function parses only one result page and return all subtitles url. | ||||||
426 | # | ||||||
427 | sub parse_vostfree{ #INTERNAL | ||||||
428 | my $mech=$_[0]; | ||||||
429 | my $jnd=0; my $jnd2=0; | ||||||
430 | my $lnk=$mech->find_all_links(); | ||||||
431 | my $nbl = $#{$lnk}; my $ind=0; | ||||||
432 | # rechercher les liens des reponses de la rechercher | ||||||
433 | my @sstlist=[];# memo array | ||||||
434 | for ( my $ind=0; $ind <= $#{$lnk} ; $ind++) | ||||||
435 | { | ||||||
436 | # search and memorize the subtitle label | ||||||
437 | #http://vo.st.fr.free.fr/edit.php?id=142 | ||||||
438 | if ($lnk->[$ind]->url() =~ m/edit\.php\?id/ ) | ||||||
439 | { | ||||||
440 | $sstlist[$jnd] = $lnk->[$ind]->url_abs(); | ||||||
441 | my $libelle = $lnk->[$ind]->text(); | ||||||
442 | print $fh "[FOUND PAGE] ". $lnk->[$ind]->url_abs()."\n" if $debug; | ||||||
443 | print $fh "[FOUND TITLE]". $lnk->[$ind]->text() ."\n" if $debug; | ||||||
444 | # get the link file url | ||||||
445 | my $result2 = $mech->get( $sstlist[$jnd] ); | ||||||
446 | print $fh "[DEBUG] GET ". $mech->uri()."\n" if ($debug); | ||||||
447 | # parse the result page : link is in a javascript popup | ||||||
448 | unlink ($tmpfile_vost) unless (!(-f $tmpfile_vost)); | ||||||
449 | open (TAMPON,'>', $tmpfile_vost) or croak "can not open $tmpfile_vost : $! \n"; | ||||||
450 | print TAMPON $mech->response->as_string; | ||||||
451 | close TAMPON; | ||||||
452 | # save one or more subtitles links | ||||||
453 | my @lnkdl=vosftree_dlfile($tmpfile_vost); | ||||||
454 | for (my $lnd=0;$lnd<=$#lnkdl;$lnd++) | ||||||
455 | { | ||||||
456 | $sstsav{$lnkdl[$lnd]} = $libelle; | ||||||
457 | if ($lnd>0){$libelle=$sstsav{$lnkdl[$lnd]}."_(".$lnd.")";}; | ||||||
458 | if ($html) | ||||||
459 | { | ||||||
460 | print HTMLFILE "".$libelle." \n"; |
||||||
461 | }; | ||||||
462 | if ($debug) | ||||||
463 | { | ||||||
464 | print $fh "[FOUND LINK ]".$lnkdl[$lnd]."\n" if $debug; | ||||||
465 | }; | ||||||
466 | |||||||
467 | if ($dbfile) | ||||||
468 | { | ||||||
469 | savedbm(); | ||||||
470 | print $fh "[DBM SAVE] ".$lnkdl[$lnd]."\n" if $debug; | ||||||
471 | }; | ||||||
472 | $nbres++; | ||||||
473 | }; | ||||||
474 | }; | ||||||
475 | $jnd++ ; | ||||||
476 | }; | ||||||
477 | # verify we get any result for the search request | ||||||
478 | if ( $jnd < 1) | ||||||
479 | { | ||||||
480 | print $fh " NO RESULT FOUND for $motif on http://vostfree.fr \n"; | ||||||
481 | if ($html) | ||||||
482 | { | ||||||
483 | print HTMLFILE " NO RESULT FOUND for $motif on http://vostfree.fr \n"; |
||||||
484 | print HTMLFILE " ending scan on $site at : ".localtime()." \n"; |
||||||
485 | }; | ||||||
486 | return; | ||||||
487 | } | ||||||
488 | else | ||||||
489 | { | ||||||
490 | print $fh "[DEBUG] : ". $nbres ." links on distant web search \n" if ($debug); | ||||||
491 | print $fh "[DEBUG] ending scan on $site at : ".localtime()."\n" if ($debug); | ||||||
492 | if ($html) | ||||||
493 | { | ||||||
494 | print HTMLFILE "". $nbres ." result(s) for pattern ".$motif." on http://vostfree.fr \n"; |
||||||
495 | print HTMLFILE " ending scan on $site at : ".localtime()." \n"; |
||||||
496 | }; | ||||||
497 | }; | ||||||
498 | if (-e $tmpfile_vost){unlink $tmpfile_vost}; | ||||||
499 | return $nbres; | ||||||
500 | } | ||||||
501 | sub searchdbm{ | ||||||
502 | my $self =attr shift; | ||||||
503 | croak " can do a local search without $motif !! \n" unless $motif; | ||||||
504 | print HTMLFILE " \n"; |
||||||
505 | print HTMLFILE "Searching on local DBM : $dbfile for $site at : ".localtime()." \n"; |
||||||
506 | print HTMLFILE " \n"; |
||||||
507 | tie(%sstsav,'DB_File',$dbfile) | ||||||
508 | or die "can not access : $dbfile : $!\n"; | ||||||
509 | while (my ($k,$v)=each(%sstsav)) | ||||||
510 | { | ||||||
511 | if ($v =~ m/$motif/i) | ||||||
512 | { | ||||||
513 | print $fh "[FOUND Libelle ] $v \n[FOUND LINK]".$k ."\n"; | ||||||
514 | if ($html) | ||||||
515 | { | ||||||
516 | print HTMLFILE "".$v." \n"; |
||||||
517 | $nbres++; | ||||||
518 | }; | ||||||
519 | }; | ||||||
520 | }; | ||||||
521 | untie(%sstsav); | ||||||
522 | if ($html) | ||||||
523 | { | ||||||
524 | print HTMLFILE " ".$nbres." result(s) found \n"; |
||||||
525 | print HTMLFILE " html finished at ".localtime()." \n"; |
||||||
526 | }; | ||||||
527 | } | ||||||
528 | #------------------------------------------------------------------------------- | ||||||
529 | # parse javascript link and rebuild url links | ||||||
530 | #------------------------------------------------------------------------------- | ||||||
531 | |||||||
532 | sub vosftree_dlfile{ #INTERNAL | ||||||
533 | use HTML::Parser; | ||||||
534 | my $content_vost = $_[0]; | ||||||
535 | our @urlfile=(); | ||||||
536 | my $pf = HTML::Parser->new(); | ||||||
537 | $pf->handler( start => \&start_vostf, "tagname,attr" ); | ||||||
538 | $pf->parse_file($content_vost); | ||||||
539 | $pf->eof; | ||||||
540 | # | ||||||
541 | sub start_vostf{ | ||||||
542 | my ( $tag, $args ) = @_; | ||||||
543 | return unless ($tag eq 'input'); | ||||||
544 | return unless ($args->{value}); | ||||||
545 | return unless ($args->{value} =~ m/T?l?charger/ ); | ||||||
546 | return unless ($args->{onclick}); | ||||||
547 | my $click = $args->{onclick}; | ||||||
548 | my @mot=split(/[']/,$click);my $filename=$mot[1];#'<--fucking eclipse!! | ||||||
549 | $filename=~ s/ /%20/g; $filename=$dlbase.$filename; | ||||||
550 | print $fh "[PARSER FOUND LINK ] ".$filename."\n" if $debug; | ||||||
551 | push @urlfile,$filename; | ||||||
552 | |||||||
553 | }; | ||||||
554 | return @urlfile; | ||||||
555 | }; | ||||||
556 | #--------------------------------------------------------------------------- | ||||||
557 | #-- save updated hash into dbm file | ||||||
558 | #-- internal use only | ||||||
559 | #--------------------------------------------------------------------------- | ||||||
560 | sub savedbm{ | ||||||
561 | my $self =attr shift; | ||||||
562 | my %hashtosave; | ||||||
563 | use DB_File; | ||||||
564 | tie (%hashtosave,'DB_File',$dbfile ) | ||||||
565 | or die "can not use $dbfile : $!\n"; | ||||||
566 | while (my ($k, $v) = each %sstsav) | ||||||
567 | { $hashtosave{$k}=$v;}; | ||||||
568 | untie(%hashtosave); | ||||||
569 | return; | ||||||
570 | }; | ||||||
571 | #-------------------------------------------------------------- | ||||||
572 | # End code | ||||||
573 | #-------------------------------------------------------------- | ||||||
574 | sub END{ | ||||||
575 | close HTMLFILE if $html; | ||||||
576 | }; | ||||||
577 | #end vostfree | ||||||
578 | # | ||||||
579 | 1; |