File Coverage

blib/lib/Spreadsheet/Read/Ingester.pm
Criterion Covered Total %
statement 54 54 100.0
branch 14 18 77.7
condition n/a
subroutine 9 9 100.0
pod 2 2 100.0
total 79 83 95.1


line stmt bran cond sub pod time code
1             package Spreadsheet::Read::Ingester ;
2             $Spreadsheet::Read::Ingester::VERSION = '0.008';
3 2     2   204706 use strict;
  2         11  
  2         50  
4 2     2   10 use warnings;
  2         4  
  2         50  
5              
6 2     2   582 use Storable;
  2         2617  
  2         93  
7 2     2   13 use File::Spec;
  2         3  
  2         47  
8 2     2   413 use File::Signature;
  2         3184  
  2         50  
9 2     2   775 use File::UserConfig;
  2         64313  
  2         86  
10 2     2   1136 use Spreadsheet::Read 0.68;
  2         793426  
  2         724  
11              
12             ### Public methods ###
13              
14             sub new {
15 2     2 1 3557 my $s = shift;
16 2         5 my $file = shift;
17 2         4 my @args = @_;
18              
19 2         4 my $sig = '';
20 2         2 eval { $sig = File::Signature->new($file)->{digest} };
  2         8  
21              
22 2         243 my $configdir = File::UserConfig->new(dist => 'Spreadsheet-Read-Ingester')->configdir;
23 2         826 my $parsed_file = File::Spec->catfile($configdir, $sig);
24              
25 2         5 my $data;
26              
27             # try to retrieve parsed data
28 2         3 eval { $data = retrieve $parsed_file };
  2         7  
29              
30             # otherwise reingest from raw file
31 2 100       616 if (!$data) {
32 1         9 my $data = Spreadsheet::Read->new($file, @_);
33 1         1004 my $error = $data->[0]{error};
34 1 50       35 die "Unable to read data from file: $file. Error: $error" if $data->[0]{error};
35 1         8 store $data, $parsed_file;
36             }
37              
38 2         209 return $data;
39             }
40              
41             sub cleanup {
42 3     3 1 1908 my $s = shift;
43 3         5 my $age = shift;
44              
45 3 100       15 if (!defined $age) {
    100          
    50          
46 1         2 $age = 30;
47             } elsif ($age eq '0') {
48 1         2 $age = -1
49             } elsif ($age !~ /^\d+$/) {
50 1         11 warn 'cleanup method accepts only positive integer values or 0';
51 1         50 return;
52             }
53              
54 2         8 my $configdir = File::UserConfig->new(dist => 'Spreadsheet-Read-Ingester')->configdir;
55              
56 2 50       808 opendir (DIR, $configdir) or die 'Could not open directory.';
57 2         33 my @files = readdir (DIR);
58 2         17 closedir (DIR);
59 2         7 foreach my $file (@files) {
60 8         59 $file = File::Spec->catfile($configdir, $file);
61 8 100       86 next if (-d $file);
62 4 100       45 if (-M $file >= $age) {
63 2 50       86 unlink $file or die 'Cannot remove file: $file';
64             }
65             }
66             }
67              
68             1; # Magic true value
69             # ABSTRACT: ingest and save csv and spreadsheet data to a perl data structure to avoid reparsing
70              
71             __END__
72              
73             =pod
74              
75             =head1 NAME
76              
77             Spreadsheet::Read::Ingester - ingest and save csv and spreadsheet data to a perl data structure to avoid reparsing
78              
79             =head1 SYNOPSIS
80              
81             use Spreadsheet::Read::Ingester;
82              
83             # ingest raw file, store parsed data file, and return data object
84             my $data = Spreadsheet::Read::Ingester->new('/path/to/file');
85              
86             # the returned data object has all the methods of a L<Spreadsheet::Read> object
87             my $num_cols = $data->sheet(1)->maxcol;
88              
89             # delete old data files older than 30 days to save disk space
90             Spreadsheet::Read::Ingester->cleanup;
91              
92             =head1 DESCRIPTION
93              
94             This module is intended to be a drop-in replacement for L<Spreadsheet::Read> and
95             is a simple, unobtrusive wrapper for it.
96              
97             Parsing spreadsheet and csv data files is time consuming, especially with large
98             data sets. If a data file is ingested more than once, much time and processing
99             power is wasted reparsing the same data. To avoid reparsing, this module uses
100             L<Storable> to save a parsed version of the data to disk when a new file is
101             ingested. All subsequent ingestions are retrieved from the stored Perl data
102             structure. Files are saved in the directory determined by L<File::UserConfig>
103             and is a function of the user's OS.
104              
105             The stored data file names are the unique file signatures for the raw data file.
106             The signature is used to detect if the original file changed, in which case the
107             data is reingested from the raw file and a new parsed file is saved using an
108             updated file signature. Parsed data files are kept indefinitely but can be
109             deleted with the C<cleanup()> method.
110              
111             Consult the L<Spreadsheet::Read> documentation for accessing the data object
112             returned by this module.
113              
114             =head1 METHODS
115              
116             =head2 new( $path_to_file )
117              
118             my $data = Spreadsheet::Read::Ingester->new('/path/to/file');
119              
120             Takes same arguments as the new constructor in L<Spreadsheet::Read> module.
121             Returns an object identical to the object returned by the L<Spreadsheet::Read>
122             module along with its corresponding methods.
123              
124             =head2 cleanup( $file_age_in_days )
125              
126             =head2 cleanup()
127              
128             Spreadsheet::Read::Ingester->cleanup(0);
129              
130             Deletes all stored files from the user's application data directory. Takes an
131             optional argument indicating the minimum number of days old the file must be
132             before it is deleted. Defaults to 30 days. Passing a value of 0 deletes all
133             files.
134              
135             =head1 REQUIRES
136              
137             =over 4
138              
139             =item * L<File::Signature|File::Signature>
140              
141             =item * L<File::Spec|File::Spec>
142              
143             =item * L<File::UserConfig|File::UserConfig>
144              
145             =item * L<Spreadsheet::Read|Spreadsheet::Read>
146              
147             =item * L<Storable|Storable>
148              
149             =item * L<strict|strict>
150              
151             =item * L<warnings|warnings>
152              
153             =back
154              
155             =for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan
156              
157             =head1 SUPPORT
158              
159             =head2 Perldoc
160              
161             You can find documentation for this module with the perldoc command.
162              
163             perldoc Spreadsheet::Read::Ingester
164              
165             =head2 Websites
166              
167             The following websites have more information about this module, and may be of help to you. As always,
168             in addition to those websites please use your favorite search engine to discover more resources.
169              
170             =over 4
171              
172             =item *
173              
174             MetaCPAN
175              
176             A modern, open-source CPAN search engine, useful to view POD in HTML format.
177              
178             L<https://metacpan.org/release/Spreadsheet-Read-Ingester>
179              
180             =back
181              
182             =head2 Source Code
183              
184             The code is open to the world, and available for you to hack on. Please feel free to browse it and play
185             with it, or whatever. If you want to contribute patches, please send me a diff or prod me to pull
186             from your repository :)
187              
188             L<https://github.com/sdondley/Spreadsheet-Read-Ingester>
189              
190             git clone git://github.com/sdondley/Spreadsheet-Read-Ingester.git
191              
192             =head1 BUGS AND LIMITATIONS
193              
194             You can make new bug reports, and view existing ones, through the
195             web interface at L<https://github.com/sdondley/Spreadsheet-Read-Ingester/issues>.
196              
197             =head1 INSTALLATION
198              
199             See perlmodinstall for information and options on installing Perl modules.
200              
201             =head1 SEE ALSO
202              
203             L<Spreadsheet::Read>
204              
205             =head1 AUTHOR
206              
207             Steve Dondley <s@dondley.com>
208              
209             =head1 COPYRIGHT AND LICENSE
210              
211             This software is copyright (c) 2019 by Steve Dondley.
212              
213             This is free software; you can redistribute it and/or modify it under
214             the same terms as the Perl 5 programming language system itself.
215              
216             =cut