| blib/lib/WWW/Google/News/TW.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 58 | 118 | 49.1 |
| branch | 6 | 22 | 27.2 |
| condition | n/a | ||
| subroutine | 10 | 11 | 90.9 |
| pod | 2 | 3 | 66.6 |
| total | 76 | 154 | 49.3 |
| line | stmt | bran | cond | sub | pod | time | code | |
|---|---|---|---|---|---|---|---|---|
| 1 | package WWW::Google::News::TW; | |||||||
| 2 | ||||||||
| 3 | 3 | 3 | 108891 | use utf8; | ||||
| 3 | 25 | |||||||
| 3 | 19 | |||||||
| 4 | 3 | 3 | 97 | use strict; | ||||
| 3 | 7 | |||||||
| 3 | 105 | |||||||
| 5 | 3 | 3 | 18 | use warnings; | ||||
| 3 | 10 | |||||||
| 3 | 310 | |||||||
| 6 | ||||||||
| 7 | require Exporter; | |||||||
| 8 | ||||||||
| 9 | our @ISA = qw(Exporter); | |||||||
| 10 | our @EXPORT_OK = qw(get_news get_news_for_topic get_news_for_category); | |||||||
| 11 | our $VERSION = '0.12'; | |||||||
| 12 | ||||||||
| 13 | 3 | 3 | 16 | use Carp; | ||||
| 3 | 6 | |||||||
| 3 | 304 | |||||||
| 14 | 3 | 3 | 2710 | use LWP; | ||||
| 3 | 355206 | |||||||
| 3 | 146 | |||||||
| 15 | 3 | 3 | 47 | use URI::Escape; | ||||
| 3 | 115 | |||||||
| 3 | 9846 | |||||||
| 16 | 3 | 3 | 13900 | use Encode; | ||||
| 3 | 49212 | |||||||
| 3 | 5302 | |||||||
| 17 | ||||||||
| 18 | sub get_news { | |||||||
| 19 | # Web version: http://news.google.com.tw/news?ned=tw | |||||||
| 20 | # plain text version : http://news.google.com.tw/news?ned=ttw | |||||||
| 21 | 1 | 1 | 1 | 9 | my $url = 'http://news.google.com.tw/news?ned=ttw'; | |||
| 22 | 1 | 4777 | my $ua = LWP::UserAgent->new; | |||||
| 23 | 1 | 4049 | $ua->agent('Mozilla/5.0'); | |||||
| 24 | 1 | 82 | my $response = $ua->get($url); | |||||
| 25 | 1 | 658921 | my $results = {}; | |||||
| 26 | 1 | 50 | 6 | return unless $response->is_success; | ||||
| 27 | ||||||||
| 28 | 1 | 14 | my $re1 = ' | (.*?)'; | ||||
| 29 | 1 | 4 | my $re2 = '([^<]*) '. |
|||||
| 30 | '([^<]*)'. | |||||||
| 31 | '\s? '. |
|||||||
| 32 | '([^<]*)...'; | |||||||
| 33 | ||||||||
| 34 | 1 | 11 | my $content = $response->decoded_content; | |||||
| 35 | 1 | 50 | 21660 | $content = $response->content if (not defined $content); | ||||
| 36 | 1 | 2588 | my @sections = split /($re1)/m,$content; | |||||
| 37 | 1 | 6 | my $current_section = ''; | |||||
| 38 | 1 | 5 | foreach my $section (@sections) { | |||||
| 39 | 1 | 50 | 137 | if ($section =~ m/$re1/m) { | ||||
| 40 | 0 | 0 | $current_section = $1; | |||||
| 41 | 0 | 0 | $current_section =~ s/ //g; # or put this (.*?)(?: )? in re1 | |||||
| 42 | } else { | |||||||
| 43 | 1 | 1 | 2073 | my @stories = split /($re2)/mi,$section; | ||||
| 1 | 14 | |||||||
| 1 | 3 | |||||||
| 1 | 21 | |||||||
| 44 | 1 | 103020 | foreach my $story (@stories) { | |||||
| 45 | 1 | 50 | 34847 | if ($story =~ m/$re2/mi) { | ||||
| 46 | 0 | 0 | 0 | if (!(exists($results->{$current_section}))) { | ||||
| 47 | 0 | 0 | $results->{$current_section} = []; | |||||
| 48 | } | |||||||
| 49 | 0 | 0 | my $story_h = {}; | |||||
| 50 | 0 | 0 | my( $url, $headline, $source, $update_time, $summary ) = ( $1, $2, $3, $4, $5 ); | |||||
| 51 | 0 | 0 | $story_h->{url} = $url; | |||||
| 52 | 0 | 0 | $story_h->{headline} = $headline; | |||||
| 53 | 0 | 0 | $story_h->{source} = $source; | |||||
| 54 | 0 | 0 | $story_h->{source} =~ s/ -//g; | |||||
| 55 | 0 | 0 | $story_h->{update_time} = $update_time; | |||||
| 56 | 0 | 0 | $story_h->{summary} = $summary; | |||||
| 57 | 0 | 0 | push(@{$results->{$current_section}},$story_h); | |||||
| 0 | 0 | |||||||
| 58 | } | |||||||
| 59 | } | |||||||
| 60 | } | |||||||
| 61 | } | |||||||
| 62 | 1 | 321 | return $results; | |||||
| 63 | } | |||||||
| 64 | ||||||||
| 65 | sub get_news_for_topic { | |||||||
| 66 | ||||||||
| 67 | 1 | 1 | 1 | 16 | my $topic = uri_escape( $_[0] ); | |||
| 68 | ||||||||
| 69 | 1 | 78 | my @results = (); | |||||
| 70 | 1 | 5 | my $url = "http://news.google.com.tw/news?hl=zh-TW&ned=ttw&q=$topic"; | |||||
| 71 | 1 | 13 | my $ua = LWP::UserAgent->new(); | |||||
| 72 | 1 | 3646 | $ua->agent('Mozilla/5.0'); | |||||
| 73 | ||||||||
| 74 | 1 | 75 | my $response = $ua->get($url); | |||||
| 75 | 1 | 50 | 635650 | return unless $response->is_success; | ||||
| 76 | ||||||||
| 77 | 1 | 15 | my $re1 = ' | (.*?)©\d{4} Google'; | ||||
| 78 | 1 | 4 | my $re2 = '(.*?) '. |
|||||
| 79 | '([^<]*)'. | |||||||
| 80 | '\s? '. |
|||||||
| 81 | '(.*?)...'; | |||||||
| 82 | ||||||||
| 83 | 1 | 11 | my $content = $response->decoded_content; | |||||
| 84 | 1 | 50 | 12182 | $content = $response->content if (not defined $content); | ||||
| 85 | 1 | 135 | my( $section ) = ( $content =~ m/$re1/s ); | |||||
| 86 | 1 | 158 | $section =~ s/\n//g; | |||||
| 87 | 1 | 120 | my @stories = split /($re2)/mi,$section; | |||||
| 88 | ||||||||
| 89 | 1 | 4 | foreach my $story (@stories) { | |||||
| 90 | 0 | 0 | 0 | if ($story =~ m/$re2/i) { | ||||
| 91 | 0 | 0 | my $story_h = {}; | |||||
| 92 | ||||||||
| 93 | 0 | 0 | my( $url, $headline, $source, $update_time, $summary ) = ( $1, $2, $3, $4, $5 ); | |||||
| 94 | 0 | 0 | $source =~ s/ / /g; | |||||
| 95 | 0 | 0 | $source =~ s/\s+/ /g; | |||||
| 96 | 0 | 0 | $update_time =~ s/ / /g; | |||||
| 97 | 0 | 0 | $update_time =~ s/\s+/ /g; | |||||
| 98 | 0 | 0 | $update_time =~ s/-//g; | |||||
| 99 | 0 | 0 | $headline =~ s#<.+?>##gi; | |||||
| 100 | 0 | 0 | $summary =~ s#<.+?>##gi; | |||||
| 101 | ||||||||
| 102 | 0 | 0 | $story_h->{url} = $url; | |||||
| 103 | 0 | 0 | $story_h->{headline} = $headline; | |||||
| 104 | 0 | 0 | $story_h->{source} = $source; | |||||
| 105 | 0 | 0 | $story_h->{update_time} = $update_time; | |||||
| 106 | 0 | 0 | $story_h->{summary} = $summary; | |||||
| 107 | ||||||||
| 108 | 0 | 0 | push(@results,$story_h); | |||||
| 109 | ||||||||
| 110 | } | |||||||
| 111 | } | |||||||
| 112 | ||||||||
| 113 | 1 | 178 | return \@results; | |||||
| 114 | ||||||||
| 115 | } | |||||||
| 116 | ||||||||
| 117 | sub get_news_for_category { | |||||||
| 118 | # Web version: http://news.google.com.tw/news?ned=tw | |||||||
| 119 | # plain text version : http://news.google.com.tw/news?ned=ttw | |||||||
| 120 | 0 | 0 | 0 | 0 | my $topic = $_[0]; | |||
| 121 | 0 | 0 | my $url = 'http://news.google.com.tw/news?ned=ttw&topic='.$topic; | |||||
| 122 | 0 | 0 | my $ua = LWP::UserAgent->new; | |||||
| 123 | 0 | 0 | $ua->agent('Mozilla/5.0'); | |||||
| 124 | 0 | 0 | my $response = $ua->get($url); | |||||
| 125 | 0 | 0 | my $results = []; | |||||
| 126 | 0 | 0 | 0 | return unless $response->is_success; | ||||
| 127 | ||||||||
| 128 | 0 | 0 | my $re1 = '
|
|||||
| 129 | 0 | 0 | my $re2 = '([^<]*) '. |
|||||
| 130 | '([^<]*)'. | |||||||
| 131 | '\s? '. |
|||||||
| 132 | '([^<]*)....*?'. | |||||||
| 133 | ']*)> |
|||||||
| 134 | 0 | 0 | my @sections = split /($re1)/s,$response->content; | |||||
| 135 | 0 | 0 | my $current_section = ''; | |||||
| 136 | 0 | 0 | foreach my $section (@sections) { | |||||
| 137 | 0 | 0 | 0 | if ($section =~ m/$re1/s) { | ||||
| 138 | 0 | 0 | $current_section = $1; | |||||
| 139 | 0 | 0 | my @stories = split /($re2)/si,$current_section; | |||||
| 140 | 0 | 0 | foreach my $story (@stories) { | |||||
| 141 | 0 | 0 | 0 | if ($story =~ m/$re2/si) { | ||||
| 142 | 0 | 0 | my $story_h = {}; | |||||
| 143 | 0 | 0 | my( $url, $headline, $source, $update_time, $summary, $related_url, $related_news) = | |||||
| 144 | ( $1, $2, $3, $4, $5, $6, $7 ); | |||||||
| 145 | 0 | 0 | $story_h->{url} = $url; | |||||
| 146 | 0 | 0 | $story_h->{headline} = $headline; | |||||
| 147 | 0 | 0 | $story_h->{source} = $source; | |||||
| 148 | 0 | 0 | $story_h->{source} =~ s/ -//g; | |||||
| 149 | 0 | 0 | $story_h->{update_time} = $update_time; | |||||
| 150 | 0 | 0 | $story_h->{summary} = $summary; | |||||
| 151 | 0 | 0 | $story_h->{related_url} = $related_url; | |||||
| 152 | 0 | 0 | $story_h->{related_news} = $related_news; | |||||
| 153 | 0 | 0 | push(@{$results},$story_h); | |||||
| 0 | 0 | |||||||
| 154 | } | |||||||
| 155 | } | |||||||
| 156 | } | |||||||
| 157 | } | |||||||
| 158 | 0 | 0 | return $results; | |||||
| 159 | } | |||||||
| 160 | 1; | |||||||
| 161 | ||||||||
| 162 | __END__ |