File Coverage

blib/lib/HTML/Robot/Scrapper.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package HTML::Robot::Scrapper;
2 1     1   45591 use Moose;
  0            
  0            
3             #use Class::Load ':all';
4             use Data::Dumper;
5             use Data::Printer;
6             use Try::Tiny;
7             use HTML::Robot::Scrapper::Benchmark::Default;
8             use HTML::Robot::Scrapper::Log::Default;
9             use HTML::Robot::Scrapper::Parser::Default;
10             use HTML::Robot::Scrapper::Queue::Default;
11             use HTML::Robot::Scrapper::UserAgent::Default;
12             use HTML::Robot::Scrapper::Encoding::Default;
13              
14             our $VERSION = '0.11';
15              
16             =head1 ATTRIBUTES
17              
18             =cut
19              
20             =head2 reader
21              
22             this attribute access your reader class instance
23              
24             =cut
25             has reader => (
26             is => 'rw',
27             # default => sub {
28             #
29             # },
30             );
31              
32             =head2 writer
33              
34             this attribute accesses your writer class instance
35              
36             =cut
37             has writer => (
38             is => 'rw',
39             # default => sub {
40             # },
41             );
42              
43             =head2 benchmark
44              
45             not ready, i want a catalyst type of method tree like debug for each method for each request
46              
47             =cut
48             has benchmark => (
49             is => 'rw',
50             default => sub {
51             HTML::Robot::Scrapper::Benchmark::Default->new();
52             },
53             );
54             =head2 chache
55              
56             the cache works, with CHI however its only useful right now for GET requests for specific urls.
57             using the cache you will not need to download the page each time, so its good for dev
58              
59             =cut
60             has cache => (
61             is => 'rw',
62             # default => sub {
63             # HTML::Robot::Scrapper::Cache::Default->new();
64             # },
65             );
66             =head2 log
67              
68             the log is not ready yet however it will be log4perl
69              
70             =cut
71             has log => (
72             is => 'rw',
73             default => sub {
74             HTML::Robot::Scrapper::Log::Default->new();
75             },
76             );
77             =head2 parser
78              
79             The default parser reads content types:
80              
81             - text/html with HTML::TreeBuilder::XPath
82              
83             which is in file: lib/HTML/Robot/Scrapper/Parser/HTML/TreeBuilder/XPath.pm
84              
85             - text/xml with XML::XPath
86              
87             which is in file: lib/HTML/Robot/Scrapper/Parser/XML/XPath.pm
88              
89             and the parser is:
90              
91             -base: lib/HTML/Robot/Scrapper/Parser/Base.pm
92              
93             override with:
94              
95             my $robot = HTML::Robot::Scrapper->new (
96             ....
97             log => {
98             base_class => 'HTML::Robot::Scrapper::Log::Base', #optional, your custom base class
99             class => 'Default' #or HTML::Robot::Scrapper::Log::Default
100             },
101             ...
102             )
103              
104             -default: lib/HTML/Robot/Scrapper/Parser/Default.pm
105              
106             =cut
107             has parser => (
108             is => 'rw',
109             default => sub {
110             HTML::Robot::Scrapper::Parser::Default->new();
111             },
112             );
113             =head2 queue
114              
115             base_class: lib/HTML/Robot/Scrapper/Queue/Base.pm
116              
117             default class: lib/HTML/Robot/Scrapper/Queue/Default.pm (Simple Instance Array)
118              
119             you can override the whole thing using a custom base_class, or simply use
120              
121             a different class
122              
123             my $robot = HTML::Robot::Scrapper->new (
124             ....
125             queue => {
126             base_class => 'HTML::Robot::Scrapper::Queue::Base',
127             class => 'HTML::Robot::Scrapper::Queue::Default'
128             },
129             ...
130             )
131              
132             =cut
133             has queue => (
134             is => 'rw',
135             default => sub {
136             HTML::Robot::Scrapper::Queue::Default->new();
137             },
138             );
139              
140             =head2 useragent
141             =cut
142             has useragent => (
143             is => 'rw',
144             default => sub {
145             HTML::Robot::Scrapper::UserAgent::Default->new();
146             },
147             );
148              
149             =head2 encoding
150             =cut
151             has encoding => (
152             is => 'rw',
153             default => sub {
154             HTML::Robot::Scrapper::Encoding::Default->new();
155             },
156             );
157              
158              
159             has custom_attrs => (
160             is => 'rw',
161             default => sub {
162             return [qw/benchmark cache log parser queue useragent encoding/];
163             }
164             );
165              
166             =head2 new
167              
168             my $robot = HTML::Robot::Scrapper->new (
169             reader => HTML::Robot::Scrapper::Reader::TestReader->new,
170             writer => HTML::Robot::Scrapper::Writer::TestWriter->new,
171             # cache => CHI->new(
172             # driver => 'BerkeleyDB',
173             # root_dir => dir( getcwd() , "cache" ),
174             # ),
175             # log => HTML::Robot::Scrapper::Log::Default->new(),
176             # parser => HTML::Robot::Scrapper::Parser::Default->new(),
177             # queue => HTML::Robot::Scrapper::Queue::Default->new(),
178             # useragent => HTML::Robot::Scrapper::UserAgent::Default->new(),
179             # encoding => HTML::Robot::Scrapper::Encoding::Default->new(),
180             );
181              
182             =cut
183              
184              
185              
186             =head2 before 'start'
187              
188             - give access to this class inside other custom classes
189              
190             =cut
191              
192             before 'start' => sub {
193             my ( $self ) = @_;
194             foreach my $attr ( @{ $self->custom_attrs } ) {
195             #give access to this class inside other classes
196             $self->$attr->robot( $self ) if defined $self->$attr and $self->$attr->can( "robot" );
197             }
198             $self->reader->robot( $self );
199             };
200              
201             sub start {
202             my ( $self ) = @_;
203             $self->reader->on_start( $self );
204             my $counter = 0;
205             while ( my $item = $self->queue->queue_get_item ) {
206             $self->benchmark->method_start('finish_in');
207              
208             print '--[ '.$counter++.' ]------------------------------------------------------------------------------'."\n";
209             print ' url: '. $item->{ url }."\n" if exists $item->{ url };
210             my $method = $item->{ method };
211             my $res = $self->useragent->visit($item);
212              
213             #clean up&set passed_key_values
214             $self->reader->passed_key_values( {} );
215             $self->reader->passed_key_values( $item->{passed_key_values} )
216             if exists $item->{passed_key_values};
217              
218             #clean up&set passed_key_values
219             $self->reader->headers( {} );
220             $self->reader->headers( $res->{headers} )
221             if exists $res->{headers};
222              
223             #TODO: set the cookies in $self->reader->cookies
224             # that way its possible to use and update 1 same cookie
225              
226            
227             $self->benchmark->method_start( $method );
228             try {
229             $self->reader->$method( );
230             } catch {
231             warn "ERROR on reader->$method: $_";
232             };
233             $self->benchmark->method_finish( $method );
234              
235             $self->benchmark->method_finish('finish_in', 'Total: ' );
236             }
237             $self->reader->on_finish( );
238             }
239              
240             =head1 NAME
241              
242             HTML::Robot::Scrapper - Your robot to parse webpages
243              
244             =head1 SYNOPSIS
245              
246             See a working example under the module: WWW::Tabela::Fipe ( search on github ).
247              
248             The class
249              
250             HTML::Robot::Scrapper::Parser::Default
251              
252             handles only text/html and text/xml by default
253              
254             So i need to add an extra option for text/plain and tell it to use
255              
256             the same method that already parses text/html, here is an example:
257              
258             * im using the code from the original as base class for this:
259              
260             HTML::Robot::Scrapper::Parser::Default
261              
262             Here i will redefine that class and tell my $robot to favor it
263              
264             ...
265             parser => WWW::Tabela::Fipe::Parser->new,
266             ...
267              
268             See below:
269              
270             package WWW::Tabela::Fipe::Parser;
271             use Moo;
272              
273             has [qw/engine robot/] => ( is => 'rw' );
274              
275             with('HTML::Robot::Scrapper::Parser::HTML::TreeBuilder::XPath');
276             with('HTML::Robot::Scrapper::Parser::XML::XPath');
277              
278             sub content_types {
279             my ( $self ) = @_;
280             return {
281             'text/html' => [
282             {
283             parse_method => 'parse_xpath',
284             description => q{
285             The method above 'parse_xpath' is inside class:
286             HTML::Robot::Scrapper::Parser::HTML::TreeBuilder::XPath
287             },
288             }
289             ],
290             'text/plain' => [
291             {
292             parse_method => 'parse_xpath',
293             description => q{
294             esse site da fipe responde em text/plain e eu preciso parsear esse content type.
295             por isso criei esta classe e passei ela como parametro, sobreescrevendo a classe
296             HTML::Robot::Scrapper::Parser::Default
297             },
298             }
299             ],
300             'text/xml' => [
301             {
302             parse_method => 'parse_xml'
303             },
304             ],
305             };
306             }
307              
308             1;
309              
310             package FIPE;
311              
312             use HTML::Robot::Scrapper;
313             #use CHI;
314             use HTTP::Tiny;
315             use HTTP::CookieJar;
316             use WWW::Tabela::Fipe;
317             use WWW::Tabela::FipeWrite;
318             #use WWW::Tabela::Fipe::Parser;
319             use HTML::Robot::Scrapper::UserAgent::Default;
320              
321             my $robot = HTML::Robot::Scrapper->new(
322             reader => WWW::Tabela::Fipe->new,
323             writer => WWW::Tabela::FipeWrite->new,
324             # cache =>
325             # CHI->new(
326             # driver => 'BerkeleyDB',
327             # root_dir => "/home/catalyst/WWW-Tabela-Fipe/cache/",
328             # ),
329             parser => WWW::Tabela::Fipe::Parser->new, #custom para tb fipe. pois eles respondem com Content type text/plain
330             useragent => HTML::Robot::Scrapper::UserAgent::Default->new(
331             ua => HTTP::Tiny->new(
332             cookie_jar => HTTP::CookieJar->new,
333             agent => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0'
334             ),
335              
336             )
337             );
338              
339             $robot->start();
340              
341             =head1 DESCRIPTION
342              
343             This cralwer has been created to be extensible. Scalable with redis queue.
344              
345             The main idea is: i need a queue of urls to be crawled, it can be an array living during
346              
347             my instance (not scalable)... or it can be a Redis queue ( scallable ), being acessed by
348              
349             many HTML::Robot::Scrapper instances.
350              
351             Each request inserted into the queue is suposed to be independent. So this thing can scale. I mean,
352              
353             Supose i need to create an object using stuff from page1, page2 and page3... that will be 3 requests
354              
355             so, the first request will access page1 and collect data into $colleted_data, then, i will append another
356              
357             request for page2 with $collected_data from page 1. So the request for page2 will collect some more data
358              
359             and merge with $collected_data from page1 generating $collected_data_from_page1_and_page2, and then i will insert
360              
361             a new request into my queue for page3 that will collect data and merge with $collected_data_from_page1_and_page2
362              
363             and create the final object: $collected_data_complete.
364              
365              
366             Basicly, you need to create a
367              
368             - reader: to read/parse your webpages and
369              
370             and a
371            
372             - writer: to save data you reader collected.
373              
374             You 'might' need to add other content types also, creating your custom class based on:
375              
376             HTML::Robot::Scrapper::Parser::Default
377              
378             See it and you will understand.. by default it handles:
379            
380             - text/html
381             - text/xml
382              
383             =head1 READER ( you create this )
384              
385             Reader: Its where the parsing logic for a specific site lives.
386              
387             You customize the reader telling it where the nodes are, etc..
388              
389             The reader class is where you create your parser.
390              
391             =head2 WRITER ( you create this )
392              
393             Writer: Its the class that will save the data the reader collects.
394              
395             ie: You can create a method "save" that receives an object and simply writes into your DB.
396              
397             Or you can make it write into DB + elastic search .. etc.. whatever you want
398              
399              
400             =head1 CONTENT TYPES AND PARSING METHODS ( you might need to extend this )
401              
402             For example, after making a request call ( HTML::Robot::Scrapper::UserAgent::Default )
403              
404             it will need to parse data.. and will use the response content type to parse that data
405              
406             by default the class that handles that is:
407              
408             package HTML::Robot::Scrapper::Parser::Default;
409             use Moose;
410              
411             has [qw/engine robot/] => ( is => 'rw' );
412              
413             with('HTML::Robot::Scrapper::Parser::HTML::TreeBuilder::XPath'); #gives parse_xpath
414             with('HTML::Robot::Scrapper::Parser::XML::XPath'); #gives parse_xml
415              
416             sub content_types {
417             my ( $self ) = @_;
418             return {
419             'text/html' => [
420             {
421             parse_method => 'parse_xpath',
422             description => q{
423             The method above 'parse_xpath' is inside class:
424             HTML::Robot::Scrapper::Parser::HTML::TreeBuilder::XPath
425             },
426             }
427             ],
428             'text/xml' => [
429             {
430             parse_method => 'parse_xml'
431             },
432             ],
433             };
434             }
435              
436             1;
437              
438             WWW::Tabela::FIPE has a custom Parser class and you can see it as an example.
439              
440             If you need to download images, you will need to create a custom parser class adding 'image/png' as content type for example.
441              
442             =head1 QUEUE OF REQUESTS
443              
444             Another example is the Queue system, it has an api: HTML::Robot::Scrapper::Queue::Base and by default
445              
446             uses: HTML::Robot::Scrapper::Queue::Array which works fine for 1 local instance. However, lets say i want a REDIS queue, so i could
447              
448             implement HTML::Robot::Scrapper::Queue::Redis and make the crawler access a remote queue.. this way i can share a queue between many crawlers independently.
449              
450             Just so you guys know, i have a redis module almost ready, it needs litle refactoring because its from another personal project. It will be released asap when i got time.
451              
452             So, if that does not fit you, or you want something else to handle those content types, just create a new class and pass it on to the HTML::Robot::Scrapper constructor. ie:
453              
454             see the SYNOPSIS
455              
456             By default it uses HTTP Tiny and useragent related stuff is in:
457              
458             HTML::Robot::Scrapper::UserAgent::Default
459              
460             =head1 Project Status
461              
462             The crawling works as expected, and works great. And the api will not change probably.
463              
464             Ideas are welcome! You are welcome to contribute.
465              
466             =head1 TODO
467              
468             Implement the REDIS Queue to give as option for the Array queue. Array queue runs local/per instance.. and the redis queue can be shared and accessed by multiple machines!
469              
470             Still need to implement the Log, proper Benchmark with subroutine tree and timing.
471              
472             Allow parameters to be passed in to UserAgent (HTTP::Tiny on this case)
473              
474             Better tests and docs.
475              
476             =head1 Example 1 - Append some urls and extract some data
477              
478             On this first example, it shows how to make a simple crawler... by simple i mean simple GET requests following urls... and grabbing some data.
479              
480             package HTML::Robot::Scrapper::Reader::TestReader;
481             use Moose;
482             with 'HTML::Robot::Scrapper::Reader';
483             use Data::Printer;
484             use Digest::SHA qw(sha1_hex);
485              
486             ## The commented stuff is useful as example
487              
488             has startpage => (
489             is => 'rw',
490             default => sub { return 'http://www.bbc.co.uk/'} ,
491             );
492              
493             has array_of_data => ( is => 'rw', default => sub { return []; } );
494              
495             has counter => ( is => 'rw', default => sub { return 0; } );
496              
497             sub on_start {
498             my ( $self ) = @_;
499             $self->append( search => $self->startpage );
500             $self->append( search => 'http://www.zap.com.br/' );
501             $self->append( search => 'http://www.uol.com.br/' );
502             $self->append( search => 'http://www.google.com/' );
503             }
504              
505             sub search {
506             my ( $self ) = @_;
507             my $title = $self->robot->parser->tree->findvalue( '//title' );
508             my $h1 = $self->robot->parser->tree->findvalue( '//h1' );
509             warn $title;
510             warn p $self->robot->useragent->url ;
511             push( @{ $self->array_of_data } ,
512             { title => $title, url => $self->robot->useragent->url, h1 => $h1 }
513             );
514             }
515              
516             sub on_link {
517             my ( $self, $url ) = @_;
518             return if $self->counter( $self->counter + 1 ) > 3;
519             if ( $url =~ m{^http://www.bbc.co.uk}ig ) {
520             $self->prepend( search => $url ); # append url on end of list
521             }
522             }
523              
524              
525             sub detail {
526             my ( $self ) = @_;
527             }
528              
529             sub on_finish {
530             my ( $self ) = @_;
531             $self->robot->writer->save_data( $self->array_of_data );
532             }
533              
534             1;
535              
536             =head1 Example 2 - Tabela FIPE ( append custom request calls )
537              
538             See the working version at: https://github.com/hernan604/WWW-Tabela-Fipe
539              
540             This example show an asp website that has those '__EVENTVALIDATION' and '__VIEWSTATE' which must be sent back again on each request... here is the example of such crawler for such website...
541              
542             This example also demonstrates how one could easily login into a website and crawl it also.
543              
544             package WWW::Tabela::Fipe;
545             use Moose;
546             with 'HTML::Robot::Scrapper::Reader';
547             use Data::Printer;
548             use utf8;
549             use HTML::Entities;
550             use HTTP::Request::Common qw(POST);
551              
552             has [ qw/marcas viewstate eventvalidation/ ] => ( is => 'rw' );
553              
554             has veiculos => ( is => 'rw' , default => sub { return []; });
555             has referer => ( is => 'rw' );
556              
557             sub start {
558             my ( $self ) = @_;
559             }
560              
561             has startpage => (
562             is => 'rw',
563             default => sub {
564             return [
565             {
566             tipo => 'moto',
567             url => 'http://www.fipe.org.br/web/indices/veiculos/default.aspx?azxp=1&v=m&p=52'
568             },
569             {
570             tipo => 'carro',
571             url => 'http://www.fipe.org.br/web/indices/veiculos/default.aspx?p=51'
572             },
573             {
574             tipo => 'caminhao',
575             url => 'http://www.fipe.org.br/web/indices/veiculos/default.aspx?v=c&p=53'
576             },
577             ]
578             },
579             );
580              
581             sub on_start {
582             my ( $self ) = @_;
583             foreach my $item ( @{ $self->startpage } ) {
584             $self->append( search => $item->{ url }, {
585             passed_key_values => {
586             tipo => $item->{ tipo },
587             referer => $item->{ url },
588             }
589             } );
590             }
591             }
592              
593             sub _headers {
594             my ( $self , $url, $form ) = @_;
595             return {
596             'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
597             'Accept-Encoding' => 'gzip, deflate',
598             'Accept-Language' => 'en-US,en;q=0.5',
599             'Cache-Control' => 'no-cache',
600             'Connection' => 'keep-alive',
601             'Content-Length' => length( POST('url...', [], Content => $form)->content ),
602             'Content-Type' => 'application/x-www-form-urlencoded; charset=utf-8',
603             'DNT' => '1',
604             'Host' => 'www.fipe.org.br',
605             'Pragma' => 'no-cache',
606             'Referer' => $url,
607             'User-Agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:20.0) Gecko/20100101 Firefox/20.0',
608             'X-MicrosoftAjax' => 'Delta=true',
609             };
610             }
611              
612             sub _form {
613             my ( $self, $args ) = @_;
614             return [
615             ScriptManager1 => $args->{ script_manager },
616             __ASYNCPOST => 'true',
617             __EVENTARGUMENT => '',
618             __EVENTTARGET => $args->{ event_target },
619             __EVENTVALIDATION => $args->{ event_validation },
620             __LASTFOCUS => '',
621             __VIEWSTATE => $args->{ viewstate },
622             ddlAnoValor => ( !exists $args->{ano} ) ? 0 : $args->{ ano },
623             ddlMarca => ( !exists $args->{marca} ) ? 0 : $args->{ marca },
624             ddlModelo => ( !exists $args->{modelo} ) ? 0 : $args->{ modelo },
625             ddlTabelaReferencia => 154,
626             txtCodFipe => '',
627             ];
628             }
629              
630             sub search {
631             my ( $self ) = @_;
632             my $marcas = $self->tree->findnodes( '//select[@name="ddlMarca"]/option' );
633             my $viewstate = $self->tree->findnodes( '//form[@id="form1"]//input[@id="__VIEWSTATE"]' )->get_node->attr('value');
634             my $event_validation = $self->tree->findnodes( '//form[@id="form1"]//input[@id="__EVENTVALIDATION"]' )->get_node->attr('value');
635             foreach my $marca ( $marcas->get_nodelist ) {
636             my $form = $self->_form( {
637             script_manager => 'UdtMarca|ddlMarca',
638             event_target => 'ddlMarca',
639             event_validation=> $event_validation,
640             viewstate => $viewstate,
641             marca => $marca->attr( 'value' ),
642             } );
643             $self->prepend( busca_marca => 'url' , {
644             passed_key_values => {
645             marca => $marca->as_text,
646             marca_id => $marca->attr( 'value' ),
647             tipo => $self->robot->reader->passed_key_values->{ tipo },
648             referer => $self->robot->reader->passed_key_values->{referer },
649             },
650             request => [
651             'POST',
652             $self->robot->reader->passed_key_values->{ referer },
653             {
654             headers => $self->_headers( $self->robot->reader->passed_key_values->{ referer } , $form ),
655             content => POST('url...', [], Content => $form)->content,
656             }
657             ]
658             } );
659             }
660             }
661              
662             sub busca_marca {
663             my ( $self ) = @_;
664             my ( $captura1, $viewstate ) = $self->robot->useragent->content =~ m/hiddenField\|__EVENTTARGET(.+)__VIEWSTATE\|([^\|]+)\|/g;
665             my ( $captura_1, $event_validation ) = $self->robot->useragent->content =~ m/hiddenField\|__EVENTTARGET(.+)__EVENTVALIDATION\|([^\|]+)\|/g;
666             my $modelos = $self->tree->findnodes( '//select[@name="ddlModelo"]/option' );
667             foreach my $modelo ( $modelos->get_nodelist ) {
668              
669              
670             next unless $modelo->as_text !~ m/selecione/ig;
671             my $kv={};
672             $kv->{ modelo_id } = $modelo->attr( 'value' );
673             $kv->{ modelo } = $modelo->as_text;
674             $kv->{ marca_id } = $self->robot->reader->passed_key_values->{ marca_id };
675             $kv->{ marca } = $self->robot->reader->passed_key_values->{ marca };
676             $kv->{ tipo } = $self->robot->reader->passed_key_values->{ tipo };
677             $kv->{ referer } = $self->robot->reader->passed_key_values->{ referer };
678             my $form = $self->_form( {
679             script_manager => 'updModelo|ddlModelo',
680             event_target => 'ddlModelo',
681             event_validation=> $event_validation,
682             viewstate => $viewstate,
683             marca => $kv->{ marca_id },
684             modelo => $kv->{ modelo_id },
685             } );
686             $self->prepend( busca_modelo => '', {
687             passed_key_values => $kv,
688             request => [
689             'POST',
690             $self->robot->reader->passed_key_values->{ referer },
691             {
692             headers => $self->_headers( $self->robot->reader->passed_key_values->{ referer } , $form ),
693             content => POST( 'url...', [], Content => $form )->content,
694             }
695             ]
696             } );
697             }
698             }
699              
700             sub busca_modelo {
701             my ( $self ) = @_;
702             my $anos = $self->tree->findnodes( '//select[@name="ddlAnoValor"]/option' );
703             foreach my $ano ( $anos->get_nodelist ) {
704             my $kv = {};
705             $kv->{ ano_id } = $ano->attr( 'value' );
706             $kv->{ ano } = $ano->as_text;
707             $kv->{ modelo_id } = $self->robot->reader->passed_key_values->{ modelo_id };
708             $kv->{ modelo } = $self->robot->reader->passed_key_values->{ modelo };
709             $kv->{ marca_id } = $self->robot->reader->passed_key_values->{ marca_id };
710             $kv->{ marca } = $self->robot->reader->passed_key_values->{ marca };
711             $kv->{ tipo } = $self->robot->reader->passed_key_values->{ tipo };
712             $kv->{ referer } = $self->robot->reader->passed_key_values->{ referer };
713             next unless $ano->as_text !~ m/selecione/ig;
714              
715             my ( $captura1, $viewstate ) = $self->robot->useragent->content =~ m/hiddenField\|__EVENTTARGET(.*)__VIEWSTATE\|([^\|]+)\|/g;
716             my ( $captura_1, $event_validation ) = $self->robot->useragent->content =~ m/hiddenField\|__EVENTTARGET(.*)__EVENTVALIDATION\|([^\|]+)\|/g;
717             my $form = $self->_form( {
718             script_manager => 'updAnoValor|ddlAnoValor',
719             event_target => 'ddlAnoValor',
720             event_validation=> $event_validation,
721             viewstate => $viewstate,
722             marca => $kv->{ marca_id },
723             modelo => $kv->{ modelo_id },
724             ano => $kv->{ ano_id },
725             } );
726              
727             $self->prepend( busca_ano => '', {
728             passed_key_values => $kv,
729             request => [
730             'POST',
731             $self->robot->reader->passed_key_values->{ referer },
732             {
733             headers => $self->_headers( $self->robot->reader->passed_key_values->{ referer } , $form ),
734             content => POST( 'url...', [], Content => $form )->content,
735             }
736             ]
737             } );
738             }
739             }
740              
741             sub busca_ano {
742             my ( $self ) = @_;
743             my $item = {};
744             $item->{ mes_referencia } = $self->tree->findvalue('//span[@id="lblReferencia"]') ;
745             $item->{ cod_fipe } = $self->tree->findvalue('//span[@id="lblCodFipe"]');
746             $item->{ marca } = $self->tree->findvalue('//span[@id="lblMarca"]');
747             $item->{ modelo } = $self->tree->findvalue('//span[@id="lblModelo"]');
748             $item->{ ano } = $self->tree->findvalue('//span[@id="lblAnoModelo"]');
749             $item->{ preco } = $self->tree->findvalue('//span[@id="lblValor"]');
750             $item->{ data } = $self->tree->findvalue('//span[@id="lblData"]');
751             $item->{ tipo } = $self->robot->reader->passed_key_values->{ tipo } ;
752             warn p $item;
753              
754             push( @{$self->veiculos}, $item );
755             }
756              
757             sub on_link {
758             my ( $self, $url ) = @_;
759             }
760              
761             sub on_finish {
762             my ( $self ) = @_;
763             warn "Terminou.... exportando dados.........";
764             $self->robot->writer->write( $self->veiculos );
765             }
766              
767             =head1 DESCRIPTION
768              
769             =head1 AUTHOR
770              
771             Hernan Lopes
772             CPAN ID: HERNAN
773             perldelux / movimentoperl
774             hernan@cpan.org
775             http://github.com/hernan604
776              
777             =head1 COPYRIGHT
778              
779             This program is free software; you can redistribute
780             it and/or modify it under the same terms as Perl itself.
781              
782             The full text of the license can be found in the
783             LICENSE file included with this module.
784              
785              
786             =head1 SEE ALSO
787              
788             perl(1).
789              
790             =cut
791              
792             #################### main pod documentation end ###################
793              
794              
795             1;
796             # The preceding line will help the module return a true value
797