File Coverage

blib/lib/Scrape/USPS/ZipLookup.pm
Criterion Covered Total %
statement 54 169 31.9
branch 6 44 13.6
condition 5 8 62.5
subroutine 12 15 80.0
pod 0 7 0.0
total 77 243 31.6


line stmt bran cond sub pod time code
1             #
2             # ZipLookup.pm
3             #
4             # Perl 5 module to standardize U.S. postal addresses by referencing
5             # the U.S. Postal Service's web site:
6             #
7             # http://www.usps.com/zip4/
8             #
9             # BE SURE TO READ, UNDERSTAND, AND ABIDE BY THE TERMS OF USE FOR THE
10             # USPS WEB SITE. LINKS ARE PROVIDED IN THE TERMS OF USE SECTION IN THE
11             # DOCUMENTATION OF THIS PROGRAM, WHICH MAY BE FOUND AT THE END OF THIS
12             # SOURCE CODE FILE.
13             #
14             # Copyright (C) 1999-2012 Gregor N. Purdy, Sr. All rights reserved.
15             # This program is free software. It is subject to the same license as Perl.
16             #
17             # [ $Id$ ]
18             #
19              
20             package Scrape::USPS::ZipLookup;
21              
22 1     1   3269 use strict;
  1         3  
  1         131  
23 1     1   7 use warnings;
  1         3  
  1         45  
24 1     1   33132 use encoding 'utf-8';
  1         145044  
  1         13  
25              
26             our $VERSION = '2.6';
27              
28 1     1   7127 use LWP::UserAgent; # To communicate with USPS and get HTML
  1         60745  
  1         59  
29 1     1   1226 use HTTP::Request::Common;
  1         2748  
  1         109  
30 1     1   1313 use HTML::TreeBuilder::XPath; # To parse HTML
  1         152877  
  1         30  
31 1     1   52 use XML::XPathEngine; # To extract data
  1         2  
  1         30  
32              
33 1     1   1996 use Scrape::USPS::ZipLookup::Address;
  1         5  
  1         38  
34              
35             my $start_url = 'https://tools.usps.com/go/ZipLookupAction!input.action?mode=0';
36              
37              
38             #
39             # new()
40             #
41              
42             sub new
43             {
44 1     1 0 51 my $class = shift;
45 1         7 my $self = bless {
46             VERBOSE => 0,
47             }, $class;
48              
49 1         4 return $self;
50             }
51              
52              
53             #
54             # verbose()
55             #
56              
57             sub verbose
58             {
59 21     21 0 62 my $self = shift;
60              
61 21 100       72 if (@_) {
62 1         27 $self->{VERBOSE} = $_[0];
63 1         4 return $_[0];
64             } else {
65 20         102 return $self->{VERBOSE};
66             }
67             }
68              
69              
70             #
71             # dump()
72             #
73              
74             sub dump
75             {
76 0     0 0 0 my $self = shift;
77 0         0 my ($response) = @_;
78              
79 0         0 my $request = $response->request;
80            
81 0         0 print "-" x 79, "\n";
82 0         0 print "HTTP Request:\n";
83 0         0 $request->dump;
84            
85 0         0 print "-" x 79, "\n";
86 0         0 print "HTTP Response:\n";
87 0         0 $response->dump;
88             }
89              
90             #
91             # std_inner()
92             #
93             # The inner portion of the process, so it can be shared by
94             # std_addr() and std_addrs().
95             #
96              
97             sub std_inner
98             {
99 5     5 0 9 my $self = shift;
100              
101             #
102             # Turn the input into an Address instance:
103             #
104              
105 5         59 my $addr = Scrape::USPS::ZipLookup::Address->new(@_);
106              
107 5 50       15 if ($self->verbose) {
108 0         0 print ' ', '_' x 77, ' ', "\n";
109 0         0 print '/', ' ' x 77, '\\', "\n";
110 0         0 $addr->dump("Input");
111 0         0 print "\n";
112             }
113              
114 5         11 my $response = undef;
115            
116             #
117             # Submit the form to the USPS web server:
118             #
119             # Unless we are in verbose mode, we make the WWW::Mechanize user agent be
120             # quiet. At the time this was written [2003-01-28], it generates a warning
121             # about the "address" form field being read-only if its not in quiet mode.
122             #
123             # We set the form's Selection field to "1" to indicate that we are doing
124             # regular zip code lookup.
125             #
126              
127 5         50 my $ua = LWP::UserAgent->new(cookie_jar => { }); # We need a cookie jar for USPS to let is through
128 5         808442 $response = $ua->get($start_url);
129            
130 5 50       5067136 if ($self->verbose) {
131 0         0 $self->dump($response);
132             }
133              
134 5         14 my $query_url = 'https://tools.usps.com/go/ZipLookupResultsAction!input.action';
135 5   50     36 my $temp = POST $query_url, [
      50        
      50        
      100        
136             resultMode => '0',
137             companyName => '',
138             address1 => $addr->delivery_address // '',
139             address2 => '',
140             city => $addr->city // '',
141             state => $addr->state // '',
142             urbanCode => '',
143             postalCode => $addr->zip_code // '',
144             zip => ''
145             ];
146            
147 5         3680 $response = $ua->request($temp);
148            
149 5 50       430663 if ($self->verbose) {
150 0         0 $self->dump($response);
151             }
152              
153 5         37 my $content = $response->decoded_content;
154              
155             #
156             # Time to Parse:
157             #
158              
159 5         677 my @matches;
160              
161 5         82 my $tree = HTML::TreeBuilder::XPath->new();
162 5         1518 $tree->parse($content);
163 5         109 my @html_matches = $tree->findnodes('//div[@class="data"]');
164              
165 5         7115 my $xp = XML::XPathEngine->new();
166              
167 5         161 for my $node (@html_matches) {
168             # $node->dump();
169              
170 0         0 my $firm = undef;
171 0         0 my $address = undef;
172 0         0 my $city = undef;
173 0         0 my $state = undef;
174 0         0 my $zip4 = undef;
175 0         0 my $zip = undef;
176              
177 0         0 my $found;
178              
179 0         0 $found = $xp->find('p[@class="std-address"]/span[@class="address1 range"]', $node);
180 0         0 for my $x ($found->get_nodelist) {
181 0         0 $address = $x->as_trimmed_text();
182              
183             # my $firm_node = $xp->find('preceding-sibling::text()', $x);
184             # for my $y ($firm_node->get_nodelist) {
185             # $firm .= $y->as_trimmed_text();
186             # }
187              
188 0         0 last;
189             }
190              
191 0         0 $found = $xp->find('p[@class="std-address"]/span[@class="city range"]', $node);
192 0         0 for my $x ($found->get_nodelist) {
193 0         0 $city = $x->as_trimmed_text();
194 0         0 last;
195             }
196              
197 0         0 $found = $xp->find('p[@class="std-address"]/span[@class="state range"]', $node);
198 0         0 for my $x ($found->get_nodelist) {
199 0         0 $state = $x->as_trimmed_text();
200 0         0 last;
201             }
202              
203 0         0 $found = $xp->find('p[@class="std-address"]/span[@class="zip4"]', $node);
204 0         0 for my $x ($found->get_nodelist) {
205 0         0 $zip4 = $x->as_trimmed_text();
206 0         0 last;
207             }
208              
209 0         0 $found = $xp->find('p[@class="std-address"]/span[@class="zip"]', $node);
210 0         0 for my $x ($found->get_nodelist) {
211 0 0       0 $zip = $x->as_trimmed_text() . (defined($zip4) ? ('-' . $zip4) : '');
212 0         0 last;
213             }
214              
215 0         0 my %details;
216              
217 0         0 my $dts = $xp->find('div/dl[@class="details"]/dt', $node);
218 0         0 for my $dt ($dts->get_nodelist) {
219 0         0 my $key = $dt->as_trimmed_text();
220              
221 0         0 my $dds = $xp->find('following-sibling::dd[1]', $dt);
222              
223 0         0 for my $dd ($dds->get_nodelist) {
224 0         0 my $value = $dd->as_trimmed_text();
225 0         0 $details{$key} = $value;
226             }
227             }
228              
229 0         0 my $carrier_route = $details{'Carrier Route'};
230 0         0 my $county = $details{'County'};
231 0         0 my $delivery_point = $details{'Delivery Point Code'};
232 0         0 my $check_digit = $details{'Check Digit'};
233              
234 0         0 my $commercial_mail_receiving_agency = $details{'Commercial Mail Receiving Agency'};
235              
236 0         0 my $lac_indicator = $details{"LAC\x{2122}"};
237 0         0 my $elot_sequence = $details{"eLOT\x{2122}"};
238 0         0 my $elot_indicator = $details{'eLOT Ascending/Descending Indicator'};
239 0         0 my $record_type = $details{'Record Type Code'};
240 0         0 my $pmb_designator = $details{'PMB Designator'};
241 0         0 my $pmb_number = $details{'PMB Number'};
242 0         0 my $default_address = $details{'Default Flag'};
243 0         0 my $early_warning = $details{'EWS Flag'};
244 0         0 my $valid = $details{'DPV Confirmation Indicator'};
245              
246 0 0       0 if ($self->verbose) {
247 0         0 print("-" x 70, "\n");
248              
249 0 0       0 print "Firm: $firm\n" if defined $firm;
250              
251 0         0 print "Address: $address\n";
252 0         0 print "City: $city\n";
253 0         0 print "State: $state\n";
254 0         0 print "Zip: $zip\n";
255              
256 0 0       0 print "Carrier Route: $carrier_route\n" if defined $carrier_route;
257 0 0       0 print "County: $county\n" if defined $county;
258 0 0       0 print "Delivery Point: $delivery_point\n" if defined $delivery_point;
259 0 0       0 print "Check Digit: $check_digit\n" if defined $check_digit;
260 0 0       0 print "Commercial Mail Receiving Agency: $commercial_mail_receiving_agency\n" if defined $commercial_mail_receiving_agency;
261 0 0       0 print "LAC Indicator: $lac_indicator\n" if defined $lac_indicator;
262 0 0       0 print "eLOT Sequence: $elot_sequence\n" if defined $elot_sequence;
263 0 0       0 print "eLOT Indicator: $elot_indicator\n" if defined $elot_indicator;
264 0 0       0 print "Record Type: $record_type\n" if defined $record_type;
265 0 0       0 print "PMB Designator: $pmb_designator\n" if defined $pmb_designator;
266 0 0       0 print "PMB Number: $pmb_number\n" if defined $pmb_number;
267 0 0       0 print "Default Address: $default_address\n" if defined $default_address;
268 0 0       0 print "Early Warning: $early_warning\n" if defined $early_warning;
269 0 0       0 print "Valid: $valid\n" if defined $valid;
270              
271 0         0 print "\n";
272             }
273              
274 0         0 my $match = Scrape::USPS::ZipLookup::Address->new($address, $city, $state, $zip);
275              
276 0         0 $match->firm($firm);
277              
278 0         0 $match->carrier_route($carrier_route);
279 0         0 $match->county($county);
280 0         0 $match->delivery_point($delivery_point);
281 0         0 $match->check_digit($check_digit);
282 0         0 $match->commercial_mail_receiving_agency($commercial_mail_receiving_agency);
283 0         0 $match->lac_indicator($lac_indicator);
284 0         0 $match->elot_sequence($elot_sequence);
285 0         0 $match->elot_indicator($elot_indicator);
286 0         0 $match->record_type($record_type);
287 0         0 $match->pmb_designator($pmb_designator);
288 0         0 $match->pmb_number($pmb_number);
289 0         0 $match->default_address($default_address);
290 0         0 $match->early_warning($early_warning);
291 0         0 $match->valid($valid);
292              
293 0         0 push @matches, $match;
294             }
295              
296 5 50       17 print('\\', '_' x 77, '/', "\n") if $self->verbose;
297              
298 5         311 return @matches;
299             }
300              
301              
302             #
303             # std_addr()
304             #
305              
306             sub std_addr
307             {
308 5     5 0 904 my $self = shift;
309              
310 5         18 return $self->std_inner(@_);
311             }
312              
313              
314             #
315             # std_addrs()
316             #
317              
318             sub std_addrs
319             {
320 0     0 0   my $self = shift;
321              
322 0           my @result;
323              
324 0           foreach my $addr (@_) {
325 0           my @addr = $self->std_inner(@$addr);
326              
327 0           push @result, [ @addr ];
328             }
329              
330 0           return @result;
331             }
332              
333              
334             #
335             # trim()
336             #
337             # A purely internal utility subroutine.
338             #
339              
340             sub trim
341             {
342 0     0 0   my $string = shift;
343 0           $string =~ s/\x{a0}/ /sg; # Remove this odd character.
344 0           $string =~ s/^\s+//s; # Trim leading whitespace.
345 0           $string =~ s/\s+$//s; # Trim trailing whitespace.
346 0           $string =~ s/\s+/ /sg; # Coalesce interior whitespace.
347 0           return $string;
348             }
349              
350              
351             #
352             # Proper module termination:
353             #
354              
355             1;
356              
357             __END__