File Coverage

blib/lib/Spreadsheet/Read/Ingester.pm
Criterion Covered Total %
statement 61 61 100.0
branch 15 20 75.0
condition n/a
subroutine 9 9 100.0
pod 2 2 100.0
total 87 92 94.5


line stmt bran cond sub pod time code
1             package Spreadsheet::Read::Ingester ;
2             $Spreadsheet::Read::Ingester::VERSION = '0.009';
3 2     2   255578 use strict;
  2         13  
  2         59  
4 2     2   11 use warnings;
  2         4  
  2         65  
5              
6 2     2   686 use Storable;
  2         3419  
  2         99  
7 2     2   13 use File::Spec;
  2         4  
  2         46  
8 2     2   542 use File::Signature;
  2         4071  
  2         60  
9 2     2   938 use File::UserConfig;
  2         75535  
  2         95  
10 2     2   1337 use Spreadsheet::Read 0.68;
  2         972425  
  2         989  
11              
12             ### Public methods ###
13              
14             sub new {
15 2     2 1 4835 my $s = shift;
16 2         5 my $file = shift;
17 2         7 my @args = @_;
18              
19 2         5 my $sig = '';
20 2         3 eval { $sig = File::Signature->new($file)->{digest} };
  2         11  
21              
22 2         297 my %args = @args;
23 2         4 my $suffix;
24 2         13 foreach my $key (sort keys %args) {
25 4         10 $suffix .= $key;
26 4         8 $suffix .= $args{$key};
27             }
28 2 50       8 if ($suffix) {
29 2         6 $sig .= "-$suffix";
30             }
31 2         12 my $configdir = File::UserConfig->new(dist => 'Spreadsheet-Read-Ingester')->configdir;
32 2         998 my $parsed_file = File::Spec->catfile($configdir, $sig);
33              
34 2         6 my $data;
35              
36             # try to retrieve parsed data
37 2         3 eval { $data = retrieve $parsed_file };
  2         9  
38              
39             # otherwise reingest from raw file
40 2 100       746 if (!$data) {
41 1         7 my $data = Spreadsheet::Read->new($file, @_);
42 1         1285 my $error = $data->[0]{error};
43 1 50       6 die "Unable to read data from file: $file. Error: $error" if $data->[0]{error};
44 1         5 store $data, $parsed_file;
45             }
46              
47 2         272 return $data;
48             }
49              
50             sub cleanup {
51 3     3 1 1002834 my $s = shift;
52 3         9 my $age = shift;
53              
54 3 100       28 if (!defined $age) {
    100          
    50          
55 1         3 $age = 30;
56             } elsif ($age eq '0') {
57 1         3 $age = -1
58             } elsif ($age !~ /^\d+$/) {
59 1         24 warn 'cleanup method accepts only positive integer values or 0';
60 1         91 return;
61             }
62              
63 2         15 my $configdir = File::UserConfig->new(dist => 'Spreadsheet-Read-Ingester')->configdir;
64              
65 2 50       1082 opendir (DIR, $configdir) or die 'Could not open directory.';
66 2         55 my @files = readdir (DIR);
67 2         28 closedir (DIR);
68 2         9 foreach my $file (@files) {
69 10         138 $file = File::Spec->catfile($configdir, $file);
70 10 100       132 next if (-d $file);
71 4 100       59 if (-M $file >= $age) {
72 2 50       150 unlink $file or die 'Cannot remove file: $file';
73             }
74             }
75             }
76              
77             1; # Magic true value
78             # ABSTRACT: ingest and save csv and spreadsheet data to a perl data structure to avoid reparsing
79              
80             __END__
81              
82             =pod
83              
84             =head1 NAME
85              
86             Spreadsheet::Read::Ingester - ingest and save csv and spreadsheet data to a perl data structure to avoid reparsing
87              
88             =head1 SYNOPSIS
89              
90             use Spreadsheet::Read::Ingester;
91              
92             # ingest raw file, store parsed data file, and return data object
93             my $data = Spreadsheet::Read::Ingester->new('/path/to/file');
94              
95             # the returned data object has all the methods of a L<Spreadsheet::Read> object
96             my $num_cols = $data->sheet(1)->maxcol;
97              
98             # delete old data files older than 30 days to save disk space
99             Spreadsheet::Read::Ingester->cleanup;
100              
101             =head1 DESCRIPTION
102              
103             This module is intended to be a drop-in replacement for L<Spreadsheet::Read> and
104             is a simple, unobtrusive wrapper for it.
105              
106             Parsing spreadsheet and csv data files is time consuming, especially with large
107             data sets. If a data file is ingested more than once, much time and processing
108             power is wasted reparsing the same data. To avoid reparsing, this module uses
109             L<Storable> to save a parsed version of the data to disk when a new file is
110             ingested. All subsequent ingestions are retrieved from the stored Perl data
111             structure. Files are saved in the directory determined by L<File::UserConfig>
112             and is a function of the user's OS.
113              
114             The stored data file names are the unique file signatures for the raw data file.
115             The signature is used to detect if the original file changed, in which case the
116             data is reingested from the raw file and a new parsed file is saved using an
117             updated file signature. Arguments passed to the constructor are appended to the
118             name of the file to ensure different parse options are accounted for. Parsed
119             data files are kept indefinitely but can be deleted with the C<cleanup()>
120             method.
121              
122             Consult the L<Spreadsheet::Read> documentation for accessing the data object
123             returned by this module.
124              
125             =head1 METHODS
126              
127             =head2 new( $path_to_file )
128              
129             my $data = Spreadsheet::Read::Ingester->new('/path/to/file');
130              
131             Takes same arguments as the new constructor in L<Spreadsheet::Read> module.
132             Returns an object identical to the object returned by the L<Spreadsheet::Read>
133             module along with its corresponding methods.
134              
135             =head2 cleanup( $file_age_in_days )
136              
137             =head2 cleanup()
138              
139             Spreadsheet::Read::Ingester->cleanup(0);
140              
141             Deletes all stored files from the user's application data directory. Takes an
142             optional argument indicating the minimum number of days old the file must be
143             before it is deleted. Defaults to 30 days. Passing a value of 0 deletes all
144             files.
145              
146             =head1 REQUIRES
147              
148             =over 4
149              
150             =item * L<File::Signature|File::Signature>
151              
152             =item * L<File::Spec|File::Spec>
153              
154             =item * L<File::UserConfig|File::UserConfig>
155              
156             =item * L<Spreadsheet::Read|Spreadsheet::Read>
157              
158             =item * L<Storable|Storable>
159              
160             =item * L<strict|strict>
161              
162             =item * L<warnings|warnings>
163              
164             =back
165              
166             =for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan
167              
168             =head1 SUPPORT
169              
170             =head2 Perldoc
171              
172             You can find documentation for this module with the perldoc command.
173              
174             perldoc Spreadsheet::Read::Ingester
175              
176             =head2 Websites
177              
178             The following websites have more information about this module, and may be of help to you. As always,
179             in addition to those websites please use your favorite search engine to discover more resources.
180              
181             =over 4
182              
183             =item *
184              
185             MetaCPAN
186              
187             A modern, open-source CPAN search engine, useful to view POD in HTML format.
188              
189             L<https://metacpan.org/release/Spreadsheet-Read-Ingester>
190              
191             =back
192              
193             =head2 Source Code
194              
195             The code is open to the world, and available for you to hack on. Please feel free to browse it and play
196             with it, or whatever. If you want to contribute patches, please send me a diff or prod me to pull
197             from your repository :)
198              
199             L<https://github.com/sdondley/Spreadsheet-Read-Ingester>
200              
201             git clone git://github.com/sdondley/Spreadsheet-Read-Ingester.git
202              
203             =head1 BUGS AND LIMITATIONS
204              
205             You can make new bug reports, and view existing ones, through the
206             web interface at L<https://github.com/sdondley/Spreadsheet-Read-Ingester/issues>.
207              
208             =head1 INSTALLATION
209              
210             See perlmodinstall for information and options on installing Perl modules.
211              
212             =head1 SEE ALSO
213              
214             L<Spreadsheet::Read>
215              
216             =head1 AUTHOR
217              
218             Steve Dondley <s@dondley.com>
219              
220             =head1 COPYRIGHT AND LICENSE
221              
222             This software is copyright (c) 2019 by Steve Dondley.
223              
224             This is free software; you can redistribute it and/or modify it under
225             the same terms as the Perl 5 programming language system itself.
226              
227             =cut