File Coverage

blib/lib/Data/Validate/CSV.pm
Criterion Covered Total %
statement 35 35 100.0
branch n/a
condition n/a
subroutine 12 12 100.0
pod n/a
total 47 47 100.0


line stmt bran cond sub pod time code
1 2     2   99863 use v5.12;
  2         18  
2 2     2   10 use strict;
  2         4  
  2         43  
3 2     2   9 use warnings;
  2         4  
  2         137  
4              
5             package Data::Validate::CSV;
6              
7             our $AUTHORITY = 'cpan:TOBYINK';
8             our $VERSION = '0.003';
9              
10 2     2   828 use Data::Validate::CSV::Cell;
  2         7  
  2         74  
11 2     2   1001 use Data::Validate::CSV::Column;
  2         9  
  2         94  
12 2     2   992 use Data::Validate::CSV::MultiValueCell;
  2         6  
  2         67  
13 2     2   878 use Data::Validate::CSV::Note;
  2         7  
  2         68  
14 2     2   905 use Data::Validate::CSV::Row;
  2         6  
  2         76  
15 2     2   880 use Data::Validate::CSV::Schema;
  2         7  
  2         74  
16 2     2   878 use Data::Validate::CSV::SingleValueCell;
  2         7  
  2         67  
17 2     2   890 use Data::Validate::CSV::Table;
  2         5  
  2         70  
18 2     2   23 use Data::Validate::CSV::Types;
  2         4  
  2         12  
19              
20             1;
21              
22             __END__
23              
24             =pod
25              
26             =encoding utf-8
27              
28             =head1 NAME
29              
30             Data::Validate::CSV - read and validate CSV
31              
32             =head1 SYNOPSIS
33              
34             CSV Schema (JSON):
35              
36             {
37             "@context": "http://www.w3.org/ns/csvw",
38             "url": "countries.csv",
39             "tableSchema": {
40             "columns": [{
41             "name": "country",
42             "datatype": { "base": "string", "length": 2 }
43             },{
44             "name": "country group",
45             "datatype": "string"
46             },{
47             "name": "name (en)",
48             "datatype": "string"
49             },{
50             "name": "name (fr)",
51             "datatype": "string"
52             },{
53             "name": "name (de)",
54             "datatype": "string"
55             },{
56             "name": "latitude",
57             "datatype": { "base": "number", "maximum": 90, "minimum": -90 }
58             },{
59             "name": "longitude",
60             "datatype": { "base": "number", "maximum": 180, "minimum": -180 }
61             }]
62             }
63             }
64              
65             CSV Data:
66              
67             "at","eu","Austria","Autriche","Österreich","47.6965545","13.34598005"
68             "be","eu","Belgium","Belgique","Belgien","50.501045","4.47667405"
69             "bg","eu","Bulgaria","Bulgarie","Bulgarien","42.72567375","25.4823218"
70              
71             Perl:
72              
73             use Path::Tiny qw(path);
74             use Data::Validate::CSV;
75            
76             my $table = Data::Validate::CSV::Table->new(
77             schema => path('countries.csv-metadata.json'),
78             input => path('countries.csv'),
79             has_header => !!0,
80             );
81            
82             while (my $row = $table->get_row) {
83             for my $e (@{$row->errors}) {
84             warn $e;
85             }
86             printf(
87             "%s is at latitude %f, longitude %f.\n",
88             $row->get("name (en)")->value,
89             $row->get("latitude")->value,
90             $row->get("longitude")->value,
91             );
92             }
93              
94             =head1 DESCRIPTION
95              
96             There's not really a lot of documentation right now.
97              
98             Mostly there's three interfaces you need to know about: tables, rows,
99             and cells. (There are also columns, schemas, and notes, but for most
100             day-to-day usage, those can be considered internal implementation
101             details.)
102              
103             =head2 Table interface
104              
105             The table is constructed with the following attributes:
106              
107             =over
108              
109             =item C<< schema >>
110              
111             A schema for the table. Can be a hashref, a JSON string, a scalar ref to
112             a JSON string, or a L<Path::Tiny> path to a file containing the schema.
113              
114             =item C<< input >>
115              
116             The CSV data for the table. Can be a filehandle, a scalar ref to a string
117             of data, or a L<Path::Tiny> path to a file.
118              
119             =item C<< has_header >>
120              
121             A boolean indicating whether the CSV contains a header row. This will be
122             used to supply any column names missing from the schema, and will be
123             skipped from being returned by C<get_row>.
124              
125             =item C<< reader >>
126              
127             A coderef which, if given a filehandle, will return a parsed line of CSV.
128             The default is basically something like:
129              
130             sub { Text::CSV_XS->new->getline($_[0]) }
131              
132             That's probably sufficient for most cases, but you may need to supply your
133             own reader for handling tab-delimited files.
134              
135             =item C<< skip_rows >>
136              
137             An integer, number of additional rows to skip I<before> the header.
138             Some CSV files contain a title or credit line. Defaults to 0.
139              
140             =item C<< skip_rows_after_header >>
141              
142             An integer, number of additional rows to skip I<after> the header.
143             Defaults to 0.
144              
145             =back
146              
147             The table provides the following methods:
148              
149             =over
150              
151             =item C<< get_row >>
152              
153             Returns a row object for the next row of the table.
154              
155             =item C<< all_rows >>
156              
157             Gets all the rows as a list.
158              
159             =item C<< row_count >>
160              
161             The number of non-skipped, non-header lines read so far.
162              
163             =back
164              
165             =head2 Row interface
166              
167             The rows returned by C<get_row> and C<all_rows> are blessed objects.
168             They provide the following methods:
169              
170             =over
171              
172             =item C<< raw_values >>
173              
174             The values returned by L<Text::CSV_XS> without any further processing.
175              
176             =item C<< values >>
177              
178             The values returned by L<Text::CSV_XS>, processed by datatype. Date and
179             time datatypes will be reformatted from any CLDR-based format to ISO 8601.
180             Booleans using non-standard representations will be changed to "1" and "0".
181             Fields that have a separator defined will be split into an arrayref.
182             Numbers given as percentages will be divided by 100. And so forth.
183              
184             =item C<< cells >>
185              
186             Returns the same values as C<values> but wrapped in cell objects. The
187             following are equivalent:
188              
189             $row->values->[0];
190             $row->cells->[0]->value;
191             $row->[0]; # $row overloads @{}
192              
193             Why fetch a cell instead of directly fetching the value? The cell object
194             offers a few other useful methods.
195              
196             =item C<< get($name) >>
197              
198             Gets a single cell from the row by its name. Names are defined in the
199             schema, or the header row if missing from the schema.
200              
201             $row->get("country")->value;
202              
203             =item C<< row_number >>
204              
205             The row number for this row in the table. Rows are numbered starting at
206             1. Headers and skipped rows are not counted.
207              
208             =item C<< key_string >>
209              
210             For tables that has a primary key, this returns a string formed by joining
211             together the primary key columns. It ought to be a unique identifier for this
212             row within the table, and if it is not, this will be raised as an error.
213              
214             =item C<< errors >>
215              
216             An arrayref of strings of errors associated with this row. This includes
217             data validation problems.
218              
219             =back
220              
221             =head2 Cell interface
222              
223             It is possible to bypass using the cell interface and access cell values
224             directly from the rows, but if accessing cells, these are the methods they
225             provide:
226              
227             =over
228              
229             =item C<< raw_value >>
230              
231             The value returned by L<Text::CSV_XS> without any further processing.
232              
233             =item C<< value >>
234              
235             The value returned by L<Text::CSV_XS>, processed by datatype.
236              
237             =item C<< inflated_value >>
238              
239             Like C<value> but inflates some values to blessed objects. Date and time
240             related datatypes will be returned as L<DateTime>, L<DateTime::Incomplete>,
241             or L<DateTime::Duration> objects. Booleans will be returned as
242             L<JSON::PP::Boolean> objects.
243              
244             =item C<< row_number >>
245              
246             The row number for the cell's parent row in the table. Rows are numbered
247             starting at 1. Headers and skipped rows are not counted.
248              
249             =item C<< col_number >>
250              
251             The column number of this cell within the parent row. Columns are numbered
252             starting at 1.
253              
254             =item C<< datatype >>
255              
256             The datatype for this cell as a hashref.
257              
258             =back
259              
260             =head1 BUGS
261              
262             Please report any bugs to
263             L<http://rt.cpan.org/Dist/Display.html?Queue=Data-Validate-CSV>.
264              
265             =head1 SEE ALSO
266              
267             L<https://www.w3.org/TR/2016/NOTE-tabular-data-primer-20160225/>.
268              
269             =head1 AUTHOR
270              
271             Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
272              
273             =head1 COPYRIGHT AND LICENCE
274              
275             This software is copyright (c) 2019 by Toby Inkster.
276              
277             This is free software; you can redistribute it and/or modify it under
278             the same terms as the Perl 5 programming language system itself.
279              
280             =head1 DISCLAIMER OF WARRANTIES
281              
282             THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
283             WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
284             MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
285