File Coverage

blib/lib/Geo/UK/Postcode/CodePointOpen.pm

Criterion	Covered	Total	%
statement	94	100	94.0
branch	27	36	75.0
condition	13	20	65.0
subroutine	16	17	94.1
pod	5	5	100.0
total	155	178	87.0

line	stmt	bran	cond	sub	pod	time	code
1							package Geo::UK::Postcode::CodePointOpen;
2
3							our $VERSION = '0.007';
4
5	6			6		642377	use Moo;
	6					73536
	6					54
6	6			6		11760	use Types::Path::Tiny qw/ Dir /;
	6					643590
	6					72
7
8	6			6		6085	use Geo::UK::Postcode::Regex;
	6					21989
	6					321
9	6			6		2245	use Geo::Coordinates::OSGB qw/ grid_to_ll /;
	6					817962
	6					1046
10	6			6		3409	use List::MoreUtils qw/ uniq /;
	6					59084
	6					77
11	6			6		10838	use Text::CSV;
	6					121456
	6					7816
12
13							has path => ( is => 'ro', isa => Dir, coerce => Dir->coercion );
14							has pc_re => ( is => 'lazy' );
15							has column_headers => ( is => 'lazy' );
16							has csv => ( is => 'lazy' );
17							has metadata => ( is => 'lazy' );
18
19							sub _build_pc_re {
20	0			0		0	Geo::UK::Postcode::Regex->strict_regex;
21							}
22
23							sub _build_column_headers {
24	3			3		3857	my $self = shift;
25
26	3					16	my $fh = $self->doc_dir->child('Code-Point_Open_Column_Headers.csv')
27							->filehandle('<');
28
29	3	50				751	my $short = $self->csv->getline($fh)
30							or die "Unable to read short column headers";
31	3	50				245	my $long = $self->csv->getline($fh)
32							or die "Unable to read long column headers";
33
34							return {
35	3					256	short => $short,
36							long => $long
37							};
38							}
39
40							sub _build_csv {
41	3	50		3		74	my $csv = Text::CSV->new( { binary => 1 } )
42							or die Text::CSV->error_diag();
43	3					686	return $csv;
44							}
45
46							#
47							# PRODUCT:
48							# DATASET VERSION NUMBER:
49							# COPYRIGHT DATE:
50							# RM UPDATE DATE:
51							# XX\t123
52							sub _build_metadata {
53	1			1		3340	my $self = shift;
54
55	1					4	my $metadata_file = $self->doc_dir->child('metadata.txt');
56
57	1					64	my @lines = $metadata_file->lines( { chomp => 1 } );
58
59	1					205	my $author = shift @lines;
60
61	1					3	my @headers = grep {/:/} @lines;
	6					14
62	1					3	my @counts = grep {/\t/} @lines;
	6					11
63
64							return {
65							AUTHOR => $author,
66	4					27	( map { split /\s:\s/ } @headers ),
67							counts =>
68	1	50				4	{ map { /\s+([A-Z]{1,2})\t(\d+)/ ? ( $1, $2 ) : () } @counts },
	2					22
69							};
70							}
71
72							sub doc_dir {
73	4			4	1	61	shift->path->child('Doc');
74							}
75
76							sub data_dir {
77	8			8	1	47	shift->path->child('Data/CSV');
78							}
79
80							sub data_files {
81	8			8	1	5360	my ( $self, @outcodes ) = @_;
82
83							my $areas
84	8	50				54	= join( '\|', uniq grep {$_} map { /^([A-Z]+)/i && lc $1 } @outcodes );
	5					28
	5					43
85
86	8	100				32	return sort $self->data_dir->children(
87							$areas ? qr/^(?:$areas)\.csv$/ #
88							: qr/\.csv$/
89							);
90							}
91
92							sub read_iterator {
93	5			5	1	12011	my ( $self, %args ) = @_;
94
95	5					16	my ( @col_names, $lat_col, $lon_col, $out_col, $in_col );
96	5	50				21	if ( $args{short_column_names} ) {
97	0					0	@col_names = @{ $self->column_headers->{short} };
	0					0
98	0					0	( $lat_col, $lon_col ) = ( 'LA', 'LO' );
99	0					0	( $out_col, $in_col ) = ( 'OC', 'IC' );
100							} else {
101	5					11	@col_names = @{ $self->column_headers->{long} };
	5					138
102	5					46	( $lat_col, $lon_col ) = ( 'Latitude', 'Longitude' );
103	5					16	( $out_col, $in_col ) = ( 'Outcode', 'Incode' );
104							}
105
106	5	100				11	my @outcodes = @{ $args{outcodes} \|\| [] };
	5					32
107	5					20	my @data_files = $self->data_files(@outcodes);
108
109	5	100				1130	my $match = @outcodes ? join( '\|', map {uc} @outcodes ) : undef;
	3					11
110	5	100				50	$match = qr/^(?:$match)$/ if $match;
111
112							# Create iterator coderef
113	5					13	my $fh2;
114	5					128	my $csv = $self->csv;
115
116							my $iterator = sub {
117
118	95			95		360	my %pc;
119	95					163	while (1) {
120
121	95	100	100			447	unless ( $fh2 && !eof $fh2 ) {
122	14	100				87	my $file = shift @data_files or return; # none left
123	9					43	$fh2 = $file->filehandle('<');
124							}
125
126							# Expects:
127							# Postcode,Positional_quality_indicator,Eastings,Northings,...
128	90					3546	my $row = $csv->getline($fh2);
129
130	90					3081	my $i = 0;
131	90					218	%pc = map { $_ => $row->[ $i++ ] } @col_names;
	900					2379
132
133	90	50	66			364	if ( $args{include_lat_long} && $pc{Eastings} && $pc{Northings} ) {
			33
134	20					59	my ( $lat, $lon ) = grid_to_ll( $pc{Eastings}, $pc{Northings} );
135
136	20					4839	$pc{$lat_col} = sprintf( "%.5f", $lat );
137	20					78	$pc{$lon_col} = sprintf( "%.5f", $lon );
138							}
139
140	90	100	100			343	if ( $args{split_postcode} \|\| $match ) {
141
142	30					94	$pc{Postcode} =~ s/\s+/ /;
143
144							my ( $area, $district, $sector, $unit )
145	30					60	= eval { $pc{Postcode} =~ $self->pc_re };
	30					593
146
147	30	50	33			489	if ( $@ \|\| !$unit ) {
148							die "Unable to parse '"
149							. $pc{Postcode}
150	0					0	. "' : Please report via "
151							. "https://github.com/mjemmeson/Geo-UK-Postcode-Regex/issues\n";
152
153							} else {
154
155	30	50	66			122	next if $match && ( $area . $district ) !~ $match;
156
157	30	100				80	if ( $args{split_postcode} ) {
158	20					50	$pc{$out_col} = $area . $district;
159	20					48	$pc{$in_col} = $sector . $unit;
160							}
161							}
162							}
163
164	90					260	last;
165							}
166
167	90					257	return \%pc;
168	5					85	};
169
170	5					32	return $iterator;
171							}
172
173							sub batch_iterator {
174	1			1	1	4751	my ( $self, %args ) = @_;
175
176	1		50			6	my $batch_size = $args{batch_size} \|\| 100;
177
178	1					7	my $read_iterator = $self->read_iterator(%args);
179
180							return sub {
181
182	6			6		32	my $i = 1;
183	6					14	my @postcodes;
184
185	6					15	while ( my $pc = $read_iterator->() ) {
186	20					51	push @postcodes, $pc;
187	20	100				69	last if ++$i > $batch_size;
188							}
189
190	6					24	return @postcodes;
191	1					11	};
192							}
193
194							1;
195
196							__END__