File Coverage

lib/WWW/Scraper/F1.pm
Criterion Covered Total %
statement 132 149 88.5
branch 21 42 50.0
condition 8 16 50.0
subroutine 20 20 100.0
pod 2 6 33.3
total 183 233 78.5


line stmt bran cond sub pod time code
1             package WWW::Scraper::F1;
2             {
3             $WWW::Scraper::F1::VERSION = '0.007';
4             }
5              
6 1     1   136156 use v5.14;
  1         4  
  1         47  
7 1     1   7 use strict;
  1         2  
  1         34  
8 1     1   6 use warnings;
  1         9  
  1         44  
9 1     1   5 use warnings qw(FATAL utf8);
  1         2  
  1         45  
10 1     1   6 use open qw(:std :utf8);
  1         3  
  1         9  
11 1     1   149 use charnames qw(:full :short);
  1         3  
  1         10  
12              
13 1     1   1023 use parent qw(Exporter);
  1         325  
  1         6  
14 1     1   997 use Encode 2.47;
  1         55854  
  1         127  
15 1     1   3863 use HTML::TreeBuilder 5.03;
  1         118052  
  1         21  
16 1     1   1887 use LWP 6.04;
  1         124173  
  1         118  
17 1     1   1688 use DateTime::Format::Natural 1.00;
  1         1202555  
  1         119  
18 1     1   3969 use DateTime::Format::Duration 1.03;
  1         24594  
  1         203  
19 1     1   4164 use Time::Piece 1.20;
  1         58687  
  1         9  
20 1     1   139 use Storable 2.39;
  1         24  
  1         51535  
21              
22             our @EXPORT = qw(get_upcoming_race get_top_championship);
23              
24             sub get_upcoming_race {
25 2     2 1 152617 my $options = shift;
26 2   50     76 my $total_info = &get_info( $options->{cache} // "1" , $options->{test});
27              
28 2         6 my $race_info = $total_info->{'race_info'};
29 2         4 my $output = '';
30 2 50       12 if ( !defined($race_info) ){
31 0         0 return undef;
32             }
33              
34 2         5 my $now = $race_info->{'now'};
35 2         6 my $dt = $race_info->{'time'};
36              
37             #convert datetime objects to Time::Piece objects, for actual day calculation (datetime object seem to want to convert 41 days to 1 month and some days)
38 2         23 my $t1 =
39             Time::Piece->strptime( $dt->strftime("%y %m %d %T"), "%y %m %d %T" );
40 2         1669 my $t2 =
41             Time::Piece->strptime( $now->strftime("%y %m %d %T"), "%y %m %d %T" );
42              
43 2         353 my $diff = $t1 - $t2;
44 2         296 my $diff_days = int $diff->days; #use Time::Piece to calculate days left
45 2         89 $diff = $dt - $now;
46              
47             #check if days or hours is 0 to prevent output like this ( 12 days 0 hours) this becomes just (12 days)
48 2 50       3383 my $until_race_time = sprintf( "%s%s",
    50          
49             ( $diff_days > 0 ) ? "$diff_days days " : "",
50 2         89 ( $diff->hours > 0 ) ? "${\$diff->hours} hours" : "" );
51 2 50       61 if ( $now > $dt ) {
52 2         141 $until_race_time .= " ago";
53             }
54             $output = {
55 2         11 'city' => $race_info->{city},
56             'country' => $race_info->{country},
57             'time' => $dt->strftime("%d/%m/%y %T"),
58             'countdown' => $until_race_time,
59             };
60              
61 2         683 return $output;
62             }
63              
64             sub get_top_championship {
65 3     3 1 651 my $options = shift;
66 3   50     34 $options->{points} ||= "yes";
67 3   100     20 $options->{length} ||= 5;
68 3   50     21 $options->{cache} ||= "1";
69 3 50       14 return if $options->{length} < 1;
70 3         15 my $total_info = &get_info( $options->{cache}, $options->{test} );
71 3         12 my $championship_table = $total_info->{'championship_info'};
72 3 50       18 if( !defined($championship_table) ){
73 0         0 return undef;
74             }
75              
76 3         13 my @ra = ();
77 3         23 for ( my $i = 1 ; $i <= $options->{length} ; $i++ ) {
78 20         92 my $tuple = {
79             'pos' => $i,
80             'driver' => $championship_table->[$i]->{'driver'},
81             'points' => $championship_table->[$i]->{'points'}
82             };
83 20         66 push @ra, $tuple;
84             }
85 3         123 return \@ra;
86             }
87              
88             sub get_info {
89 5     5 0 12 my $cache = shift;
90 5         14 my $testing = shift;
91 5         11 my $cache_name = "f1.cache";
92 5         12 my ( $cache_content, $total_info );
93 5         77 my $now = DateTime->now( time_zone => 'local' );
94 5 50 33     345637 if ( $cache && -e $cache_name && !$testing ) { #cache file exists
      33        
95 0         0 $cache_content = retrieve($cache_name);
96              
97 0 0       0 if ( $now > $cache_content->{'race_info'}->{'time'} ) {
98 0         0 my $web_content = &build_from_internet();
99 0 0       0 return undef if not $web_content;
100 0         0 $total_info = &extract_info_from_web_content($web_content);
101 0         0 store $total_info, $cache_name;
102             }
103             else {
104 0         0 $total_info = $cache_content;
105             }
106              
107             }
108             else { #get info from web, extract info and put it in a cacheble hash
109 5         32 my $web_content = &build_from_internet($testing);
110 5 50       19 return undef if not $web_content;
111 5         22 $total_info = &extract_info_from_web_content($web_content);
112 5         94 store $total_info, $cache_name;
113             }
114 5         406473 $total_info->{'race_info'}->{'now'} = $now;
115 5         30 return $total_info;
116             }
117              
118             sub build_from_internet {
119 5   50 5 0 22 my $test = shift || undef;
120 5         15 my %info = ();
121 5         9 my ($race_info_content, $championship_content);
122 5 50       15 if( $test ){
123 5         25 $race_info_content = decode_utf8( do_GET($test->{upcoming}) );
124             }else{
125 0         0 $race_info_content = decode_utf8( do_GET("http://www.formula1.com/default.html") );
126             }
127 5 50       24557 if ( !$race_info_content ) { #get failed (no internet connection)
128 0         0 print "race_info: Could not fetch form inet and no cache\n";
129 0         0 $info{'race_content'} = undef;
130             }else{
131 5         30 $info{'race_content'} = $race_info_content;
132             }
133              
134 5         74 my $now = DateTime->now();
135 5 50       3290 if( $test ){
136 5         64 $championship_content = decode_utf8( do_GET( $test->{championship } ) );
137             }else{
138 0         0 $championship_content = decode_utf8( do_GET( "http://www.formula1.com/results/driver/" . $now->year ) );
139             }
140 5 50       6838 if ( !$championship_content ) { #get failed (no internet connection)
141 0         0 print "championship: Could not fetch from (no results yet this season?) and no cache\n";
142 0         0 $info{'championship_content'} = undef;
143             }
144 5         19 $info{'championship_content'} = $championship_content;
145             # open( my $rc_info, "<", $test->{upcoming} ) or die "Could not open $test->{upcoming}: $!";
146             # open( my $chmp_info, "<", $test->{championship} ) or die "Could not open $test->{upcoming}: $!";
147 5         79 return \%info;
148             }
149              
150             sub extract_info_from_web_content {
151 5     5 0 12 my $web_content = shift;
152 5         10 my $total_info = {};
153             ################ extract time and place info from web_content
154 5         7 my $race_info;
155 5         66 my $root = HTML::TreeBuilder->new;
156              
157 5 50       2789 if( $web_content->{'race_content'} ){
158             #race time extraction
159 5         44279 foreach my $line ( split( '\n', $web_content->{'race_content'} ) ) {
160 10725 100       48707 if ( $line =~ m/grand_prix\[0\]\.sessions/ ) {
161 5         70 $line =~ m/'Race','(.+)'/;
162 5         119 my $parser = DateTime::Format::Natural->new( time_zone => 'GMT' );
163 5         130373 my $dt = $parser->parse_datetime( $parser->extract_datetime($1) );
164 5         1950616 $dt->set_time_zone( DateTime::TimeZone->new( name => 'local' ) ); #convert timezone to local
165 5         60979 $race_info->{time} = $dt;
166             }
167             }
168 5         2274 $root->parse( $web_content->{'race_content'} );
169 5         3907982 $race_info->{country} =
170             ucfirst
171             lc $root->find_by_attribute( "id", "country_name" )->as_trimmed_text();
172 5         99949 $race_info->{city} =
173             $root->find_by_attribute( "id", "city_name" )->as_trimmed_text();
174              
175 5         77880 $race_info->{city} =~ s/[\P{alpha}]//;
176 5         45 $race_info->{city} = ucfirst lc
177             $race_info->{city}; #strip the html gunk, by removing all Non-alpha chars
178              
179 5         30 $total_info->{'race_info'} = $race_info;
180             }else{
181 0         0 $total_info->{'race_info'} = undef;
182             }
183              
184             ################ extract championship info from web_content
185 5 50       28 if( $web_content->{'championship_content'} ){
186 5         444 $root->parse( $web_content->{'championship_content'} );
187              
188 5         2331817 my $table = $root->look_down(
189             "_tag" => "table",
190             "class" => "raceResults"
191             );
192 5         140601 my @rows = $table->look_down( "_tag" => "tr" );
193 5         23555 for my $row (@rows) {
194 130         7887 my @columns = $row->look_down( "_tag", "td" );
195 130 100       35606 if (@columns) {
196 125         429 $total_info->{'championship_info'}->[ $columns[0]->as_text() ]
197             ->{'driver'} = $columns[1]->as_text();
198 125         8842 $total_info->{'championship_info'}->[ $columns[0]->as_text() ]
199             ->{'points'} = $columns[4]->as_text();
200             }
201             }
202             }else{
203 0         0 $total_info->{'championship_info'} = undef;
204             }
205 5         36591 return $total_info;
206             }
207              
208             sub do_GET {
209 10     10 0 19 my $browser;
210 10 50       167 $browser = LWP::UserAgent->new unless $browser;
211 10         20327 my $resp = $browser->get(@_);
212 10 50       121115 return ( $resp->content, $resp->status_line, $resp->is_success, $resp )
213             if wantarray;
214 10 50       57 return unless $resp->is_success;
215 10         159 return $resp->content;
216             }
217             1;
218              
219             __END__