File Coverage

blib/lib/Data/Validate/CSV.pm

Criterion	Covered	Total	%
statement	35	35	100.0
branch			n/a
condition			n/a
subroutine	12	12	100.0
pod			n/a
total	47	47	100.0

line	stmt	sub	time	code
1	2	2	99863	use v5.12;
	2		18
2	2	2	10	use strict;
	2		4
	2		43
3	2	2	9	use warnings;
	2		4
	2		137
4
5				package Data::Validate::CSV;
6
7				our $AUTHORITY = 'cpan:TOBYINK';
8				our $VERSION = '0.003';
9
10	2	2	828	use Data::Validate::CSV::Cell;
	2		7
	2		74
11	2	2	1001	use Data::Validate::CSV::Column;
	2		9
	2		94
12	2	2	992	use Data::Validate::CSV::MultiValueCell;
	2		6
	2		67
13	2	2	878	use Data::Validate::CSV::Note;
	2		7
	2		68
14	2	2	905	use Data::Validate::CSV::Row;
	2		6
	2		76
15	2	2	880	use Data::Validate::CSV::Schema;
	2		7
	2		74
16	2	2	878	use Data::Validate::CSV::SingleValueCell;
	2		7
	2		67
17	2	2	890	use Data::Validate::CSV::Table;
	2		5
	2		70
18	2	2	23	use Data::Validate::CSV::Types;
	2		4
	2		12
19
20				1;
21
22				__END__
23
24				=pod
25
26				=encoding utf-8
27
28				=head1 NAME
29
30				Data::Validate::CSV - read and validate CSV
31
32				=head1 SYNOPSIS
33
34				CSV Schema (JSON):
35
36				{
37				"@context": "http://www.w3.org/ns/csvw",
38				"url": "countries.csv",
39				"tableSchema": {
40				"columns": [{
41				"name": "country",
42				"datatype": { "base": "string", "length": 2 }
43				},{
44				"name": "country group",
45				"datatype": "string"
46				},{
47				"name": "name (en)",
48				"datatype": "string"
49				},{
50				"name": "name (fr)",
51				"datatype": "string"
52				},{
53				"name": "name (de)",
54				"datatype": "string"
55				},{
56				"name": "latitude",
57				"datatype": { "base": "number", "maximum": 90, "minimum": -90 }
58				},{
59				"name": "longitude",
60				"datatype": { "base": "number", "maximum": 180, "minimum": -180 }
61				}]
62				}
63				}
64
65				CSV Data:
66
67				"at","eu","Austria","Autriche","Ã–sterreich","47.6965545","13.34598005"
68				"be","eu","Belgium","Belgique","Belgien","50.501045","4.47667405"
69				"bg","eu","Bulgaria","Bulgarie","Bulgarien","42.72567375","25.4823218"
70
71				Perl:
72
73				use Path::Tiny qw(path);
74				use Data::Validate::CSV;
75
76				my $table = Data::Validate::CSV::Table->new(
77				schema => path('countries.csv-metadata.json'),
78				input => path('countries.csv'),
79				has_header => !!0,
80				);
81
82				while (my $row = $table->get_row) {
83				for my $e (@{$row->errors}) {
84				warn $e;
85				}
86				printf(
87				"%s is at latitude %f, longitude %f.\n",
88				$row->get("name (en)")->value,
89				$row->get("latitude")->value,
90				$row->get("longitude")->value,
91				);
92				}
93
94				=head1 DESCRIPTION
95
96				There's not really a lot of documentation right now.
97
98				Mostly there's three interfaces you need to know about: tables, rows,
99				and cells. (There are also columns, schemas, and notes, but for most
100				day-to-day usage, those can be considered internal implementation
101				details.)
102
103				=head2 Table interface
104
105				The table is constructed with the following attributes:
106
107				=over
108
109				=item C<< schema >>
110
111				A schema for the table. Can be a hashref, a JSON string, a scalar ref to
112				a JSON string, or a L<Path::Tiny> path to a file containing the schema.
113
114				=item C<< input >>
115
116				The CSV data for the table. Can be a filehandle, a scalar ref to a string
117				of data, or a L<Path::Tiny> path to a file.
118
119				=item C<< has_header >>
120
121				A boolean indicating whether the CSV contains a header row. This will be
122				used to supply any column names missing from the schema, and will be
123				skipped from being returned by C<get_row>.
124
125				=item C<< reader >>
126
127				A coderef which, if given a filehandle, will return a parsed line of CSV.
128				The default is basically something like:
129
130				sub { Text::CSV_XS->new->getline($_[0]) }
131
132				That's probably sufficient for most cases, but you may need to supply your
133				own reader for handling tab-delimited files.
134
135				=item C<< skip_rows >>
136
137				An integer, number of additional rows to skip I<before> the header.
138				Some CSV files contain a title or credit line. Defaults to 0.
139
140				=item C<< skip_rows_after_header >>
141
142				An integer, number of additional rows to skip I<after> the header.
143				Defaults to 0.
144
145				=back
146
147				The table provides the following methods:
148
149				=over
150
151				=item C<< get_row >>
152
153				Returns a row object for the next row of the table.
154
155				=item C<< all_rows >>
156
157				Gets all the rows as a list.
158
159				=item C<< row_count >>
160
161				The number of non-skipped, non-header lines read so far.
162
163				=back
164
165				=head2 Row interface
166
167				The rows returned by C<get_row> and C<all_rows> are blessed objects.
168				They provide the following methods:
169
170				=over
171
172				=item C<< raw_values >>
173
174				The values returned by L<Text::CSV_XS> without any further processing.
175
176				=item C<< values >>
177
178				The values returned by L<Text::CSV_XS>, processed by datatype. Date and
179				time datatypes will be reformatted from any CLDR-based format to ISO 8601.
180				Booleans using non-standard representations will be changed to "1" and "0".
181				Fields that have a separator defined will be split into an arrayref.
182				Numbers given as percentages will be divided by 100. And so forth.
183
184				=item C<< cells >>
185
186				Returns the same values as C<values> but wrapped in cell objects. The
187				following are equivalent:
188
189				$row->values->[0];
190				$row->cells->[0]->value;
191				$row->[0]; # $row overloads @{}
192
193				Why fetch a cell instead of directly fetching the value? The cell object
194				offers a few other useful methods.
195
196				=item C<< get($name) >>
197
198				Gets a single cell from the row by its name. Names are defined in the
199				schema, or the header row if missing from the schema.
200
201				$row->get("country")->value;
202
203				=item C<< row_number >>
204
205				The row number for this row in the table. Rows are numbered starting at
206				1. Headers and skipped rows are not counted.
207
208				=item C<< key_string >>
209
210				For tables that has a primary key, this returns a string formed by joining
211				together the primary key columns. It ought to be a unique identifier for this
212				row within the table, and if it is not, this will be raised as an error.
213
214				=item C<< errors >>
215
216				An arrayref of strings of errors associated with this row. This includes
217				data validation problems.
218
219				=back
220
221				=head2 Cell interface
222
223				It is possible to bypass using the cell interface and access cell values
224				directly from the rows, but if accessing cells, these are the methods they
225				provide:
226
227				=over
228
229				=item C<< raw_value >>
230
231				The value returned by L<Text::CSV_XS> without any further processing.
232
233				=item C<< value >>
234
235				The value returned by L<Text::CSV_XS>, processed by datatype.
236
237				=item C<< inflated_value >>
238
239				Like C<value> but inflates some values to blessed objects. Date and time
240				related datatypes will be returned as L<DateTime>, L<DateTime::Incomplete>,
241				or L<DateTime::Duration> objects. Booleans will be returned as
242				L<JSON::PP::Boolean> objects.
243
244				=item C<< row_number >>
245
246				The row number for the cell's parent row in the table. Rows are numbered
247				starting at 1. Headers and skipped rows are not counted.
248
249				=item C<< col_number >>
250
251				The column number of this cell within the parent row. Columns are numbered
252				starting at 1.
253
254				=item C<< datatype >>
255
256				The datatype for this cell as a hashref.
257
258				=back
259
260				=head1 BUGS
261
262				Please report any bugs to
263				L<http://rt.cpan.org/Dist/Display.html?Queue=Data-Validate-CSV>.
264
265				=head1 SEE ALSO
266
267				L<https://www.w3.org/TR/2016/NOTE-tabular-data-primer-20160225/>.
268
269				=head1 AUTHOR
270
271				Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
272
273				=head1 COPYRIGHT AND LICENCE
274
275				This software is copyright (c) 2019 by Toby Inkster.
276
277				This is free software; you can redistribute it and/or modify it under
278				the same terms as the Perl 5 programming language system itself.
279
280				=head1 DISCLAIMER OF WARRANTIES
281
282				THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
283				WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
284				MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
285