File Coverage

blib/lib/Net/Parliament.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package Net::Parliament;
2 4     4   31229 use Moose;
  0            
  0            
3             use Net::Parliament::UserAgent;
4             use HTML::TableExtract qw/tree/;
5             use HTML::TreeBuilder;
6             use XML::Simple;
7              
8             =head1 NAME
9              
10             Net::Parliament - Scrape data from parl.gc.ca
11              
12             =cut
13              
14             our $VERSION = '0.03';
15              
16             =head1 SYNOPSIS
17              
18             This module will fetch HTML and XML from parl.gc.ca,
19             and then parse it into hashrefs.
20              
21             use Net::Parliament;
22              
23             my $parl = Net::Parliament->new(
24             parliament => 40,
25             session => 2,
26             );
27             my $members = $parl->members();
28              
29             =cut
30              
31             has '_members_base_url' => (
32             is => 'ro', isa => 'Str',
33             default => 'http://webinfo.parl.gc.ca/MembersOfParliament/',
34             );
35              
36             has 'members_html_url' => (
37             is => 'ro',
38             isa => 'Str',
39             default => sub {
40             shift->_members_base_url
41             . 'MainMPsCompleteList.aspx?TimePeriod=Current';
42             },
43             );
44              
45             has '_bills_base_domain' => (
46             is => 'ro', isa => 'Str',
47             default => 'http://www2.parl.gc.ca',
48             );
49              
50             has '_bills_base_url' => (
51             is => 'ro', isa => 'Str',
52             default => 'http://www2.parl.gc.ca/HouseBills/billsgovernment.aspx?',
53             );
54              
55             has '_bill_votes_base_url' => (
56             is => 'ro', isa => 'Str',
57             default => 'http://www2.parl.gc.ca/housebills/BillVotes.aspx?xml=True&SchemaVersion=1.0',
58             );
59              
60             has 'ua' => (
61             is => 'ro',
62             isa => 'Object',
63             handles => ['get'],
64             default => sub { Net::Parliament::UserAgent->new },
65             );
66              
67             has 'parliament' => (is => 'rw', isa => 'Int', required => 1);
68             has 'session' => (is => 'rw', isa => 'Int', required => 1);
69              
70             =head1 METHODS
71              
72             =head2 members(%opts)
73              
74             This method returns an arrayref containing a hashref for each
75             member of parliament. Fetching the data is cached via
76             Net::Parliament::UserAgent.
77              
78             Options:
79              
80             =over 4
81              
82             =item extended
83              
84             If set to true, extra data from the member's home page will
85             be fetched. This takes much longer.
86              
87             =item limit
88              
89             Only return this number of results. Useful for testing.
90              
91             =back
92              
93             =cut
94              
95             sub members {
96             my $self = shift;
97             my %opts = @_;
98              
99             my $members_page = $self->get($self->members_html_url);
100              
101             my $te = HTML::TableExtract->new(
102             headers => [ 'Member of Parliament', 'Constituency',
103             'Province/Territory', 'Caucus' ],
104             );
105             $te->parse($members_page);
106              
107             my ($member_table) = $te->tables;
108             my $table_tree = $member_table->tree;
109              
110             my @members;
111             my $max = $opts{limit} || $table_tree->maxrow;
112             $max = $table_tree->maxrow if $max > $table_tree->maxrow;
113             for my $i (1 .. $max) {
114             my $row = $table_tree->row($i);
115             my @cols =$row->look_down('_tag', 'td');
116              
117             my $member = {};
118             eval {
119             $member->{member_url}
120             = $self->_members_base_url
121             . $cols[0]->find_by_tag_name('a')->attr('href');
122             $member->{member_name}
123             = $cols[0]->find_by_tag_name('a')->content->[0];
124             $member->{constituency}
125             = $cols[1]->find_by_tag_name('a')->content->[0];
126             $member->{province} = $cols[2]->content->[0];
127             $member->{caucus} = $cols[3]->content->[0];
128             if (ref($member->{caucus})) {
129             $member->{caucus} = $member->{caucus}->content->[0];
130             }
131             if ($member->{member_url} =~ m/Key=(\d+)/) {
132             $member->{member_id} = $1;
133             }
134             };
135             if ($@) {
136             warn "Error parsing row: $@";
137             $row->dump;
138             }
139              
140             $member = $self->_load_member($member)
141             if $opts{extended};
142             push @members, $member;
143             }
144              
145             return \@members;
146             }
147              
148             =head2 bills()
149              
150             This method returns an arrayref containing a hashref for each
151             Government Bill raised in parliament.
152              
153             =cut
154              
155             sub bills {
156             my $self = shift;
157             my $p = $self->parliament;
158             my $s = $self->session;
159              
160             my $url = $self->_bills_base_url . "Parl=$p&Ses=$s";
161             my $html = $self->get($url);
162             my $block_oh_html = <<EOT;
163             <div class="BillBlock BillBlockOdd" id="divBillBlockC2">
164             <span class="BillNumberCell">C-2</span>
165             <div class="BillSummary">
166             <span class="BillLongText">An Act to amend the Criminal Code and to make consequential amendments to other Acts</span>
167             <div class="BillSponsor"><a class="WebOption" onclick="GetWebOptions('PRISM','Affiliation',105824,'1');return false;" onmouseout="inDiv=0;setTimeout('TimeoutHide()',1000);return false;" href="/HousePublications/GetWebOptionsCallBack.aspx?SourceSystem=PRISM&amp;ResourceType=Affiliation&amp;ResourceID=105824&amp;language=1&amp;DisplayMode=2">The Minister of Justice</a></div>
168             <div>
169             <div><a class="BillVersionLink" href="/HouseBills/StaticLinkRedirector.aspx?Language=e&amp;LinkTitle=%28C-2%29%20Legislative%20Summary&amp;RedirectUrl=%2fSites%2fLOP%2fLEGISINFO%2findex.asp%3fList%3dls%26Language%3dE%26Query%3d5273%26Session%3d15&amp;RefererUrl=X&amp;StatsEnabled=true">Legislative Summary</a></div>
170             <div><a class="BillVersionLink" href="/HousePublications/Publication.aspx?DocId=3078412&amp;Language=e&amp;Mode=1">First Reading</a></div>
171             <div><a class="BillVersionLink" href="/HousePublications/Publication.aspx?DocId=3151626&amp;Language=e&amp;Mode=1">As passed by the House of Commons</a></div>
172             <div><a class="BillVersionLink" href="/HousePublications/Publication.aspx?DocId=3320180&amp;Language=e&amp;Mode=1">Royal Assent</a></div>
173             <div><a class="BillVersionLink" href="/housebills/BillVotes.aspx?Language=e&amp;Mode=1&amp;Parl=39&amp;Ses=2&amp;Bill=C2">Votes</a></div>
174             </div>
175             </div>
176             </div>
177             EOT
178              
179             my $tree = HTML::TreeBuilder->new_from_content($html);
180             my @billblocks = $tree->look_down(class => qr/\bBillBlock\b/);
181             my @bills;
182             for my $b (@billblocks) {
183             my $bill = {
184             parliament => $p,
185             session => $s,
186             name => $b->look_down(class => 'BillNumberCell')->content->[0],
187             summary => $b->look_down(class => 'BillLongText')->content->[0],
188             sponsor_title =>
189             $b->look_down(class => 'BillSponsor')->content->[0]
190             };
191              
192             if (ref($bill->{sponsor_title})) {
193             my $bs = $bill->{sponsor_title};
194             $bill->{sponsor_title} = $bs->content->[0];
195             my $url = $bs->look_down(
196             _tag => 'a')->attr('href');
197             if ($url =~ m/ResourceID=(\d+)/) {
198             $bill->{sponsor_id} = $1;
199             }
200             }
201              
202             my @links = $b->look_down(class => 'BillVersionLink');
203             for my $link (@links) {
204             my $url = $self->_bills_base_domain . $link->attr('href');
205             $url =~ s/\s/%20/g;
206             push @{ $bill->{links} }, { $link->content->[0] => $url };
207             }
208              
209             push @bills, $bill;
210             }
211             return \@bills;
212             }
213              
214             =head2 bill_votes( $bill_name )
215              
216             This method returns an arrayref containing a hashref for each
217             vote on the specified Bill.
218              
219             =cut
220              
221             sub bill_votes {
222             my $self = shift;
223             my $bill = shift or die "Must specify a bill name";
224             $bill =~ s/-//;
225             my $p = $self->parliament;
226             my $s = $self->session;
227              
228             my $url = $self->_bill_votes_base_url . "&Parl=$p&Ses=$s&Bill=$bill";
229             my $xml = XMLin($self->get($url));
230            
231             return [] unless $xml->{Vote};
232             return [ $xml->{Vote} ] if ref($xml->{Vote}) eq 'HASH';
233             return $xml->{Vote};
234             }
235              
236             =head2 member_votes( $member_id )
237              
238             This method returns an arrayref containing a hashref for each
239             vote made by the specified member.
240              
241             =cut
242              
243             sub member_votes {
244             my $self = shift;
245             my $member = shift or die "Must specify a member ID";
246             my $p = $self->parliament;
247             my $s = $self->session;
248              
249             my $url = $self->_members_base_url
250             . "ProfileMP.aspx?key=$member&SubSubject=1006&"
251             . "FltrParl=$p&FltrSes=$s&VoteType=1&"
252             . 'xml=true&SchemaVersion=1.0';
253             my $xml = XMLin($self->get($url));
254             return $xml->{Vote};
255             }
256              
257             sub _load_member {
258             my $self = shift;
259             my $member = shift;
260             my $member_url = $member->{member_url};
261              
262             my $content = $self->get($member_url);
263             eval {
264             $member->{profile_photo_url} = $self->_extract_photo_url($content);
265             };
266             if ($@) {
267             die "Couldn't extract profile photo from $member_url: $@\n";
268             }
269             eval {
270             $self->_extract_more_details($content, $member);
271             };
272             if ($@) {
273             die "Couldn't extract details from $member_url: $@\n";
274             }
275              
276             return $member;
277             }
278              
279             sub _extract_photo_url {
280             my $self = shift;
281             my $content = shift;
282              
283             my $te = HTML::TableExtract->new( depth => 3, count => 1);
284             $te->parse($content);
285              
286             my $profile_img;
287             eval {
288             my ($member_table) = $te->tables;
289             my $row = $member_table->tree->row(1);
290             ($profile_img) = $row->look_down('_tag', 'img');
291             };
292             if ($@) {
293             die "Error finding profile image in content:\n$content\n\n";
294             }
295             return $self->_members_base_url . $profile_img->attr('src');
296             }
297              
298             sub _extract_more_details {
299             my $self = shift;
300             my $content = shift;
301             my $member = shift;
302              
303             my $te = HTML::TableExtract->new( depth => 5, count => 6);
304             $te->parse($content);
305              
306             my ($details) = $te->tables;
307             my $tree = $details->tree;
308              
309             for my $row (map { $tree->row($_) } 5 .. 8) {
310             eval {
311             my ($key, $val)
312             = map { $_->content->[0]->content->[0] }
313             $row->look_down('_tag', 'td');
314              
315             $key =~ s/:\*?$//;
316             $key = lc($key);
317              
318             if ($key eq 'web site') {
319             $val = 'http://' . $val;
320             }
321              
322             $member->{$key} = $val;
323             };
324             }
325             }
326              
327             =head1 AUTHOR
328              
329             Luke Closs, C<< <cpan at 5thplane.com> >>
330              
331             =head1 BUGS
332              
333             Please report any bugs or feature requests to C<bug-net-parliament at rt.cpan.org>, or through
334             the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Net-Parliament>. I will be notified, and then you'll
335             automatically be notified of progress on your bug as I make changes.
336              
337             =head1 SUPPORT
338              
339             You can find documentation for this module with the perldoc command.
340              
341             perldoc Net::Parliament
342              
343             You can also look for information at:
344              
345             =over 4
346              
347             =item * RT: CPAN's request tracker
348              
349             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Net-Parliament>
350              
351             =item * AnnoCPAN: Annotated CPAN documentation
352              
353             L<http://annocpan.org/dist/Net-Parliament>
354              
355             =item * CPAN Ratings
356              
357             L<http://cpanratings.perl.org/d/Net-Parliament>
358              
359             =item * Search CPAN
360              
361             L<http://search.cpan.org/dist/Net-Parliament/>
362              
363             =back
364              
365             =head1 ACKNOWLEDGEMENTS
366              
367             Thanks to parl.gc.ca for the parts of their site in XML format.
368              
369             =head1 COPYRIGHT & LICENSE
370              
371             Copyright 2009 Luke Closs, all rights reserved.
372              
373             This program is free software; you can redistribute it and/or modify it
374             under the same terms as Perl itself.
375              
376             =cut
377              
378             1;