File Coverage

blib/lib/Data/Downloader.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Data::Downloader -- Download and organize files using RSS feeds and templates.
4              
5             =head1 SYNOPSIS
6              
7             use Data::Downloader -init_logging => "INFO";
8              
9             my $repo = Data::Downloader::Repository->new(name => "ozone");
10             $repo->load(speculative => 1) or die "ozone repository not configured";
11              
12             for my $feed (@{ $dado->feeds }) {
13             $feed->refresh;
14             }
15              
16             for my $file (@{ $dado->files }) {
17             $file->download;
18             }
19              
20             =head1 DESCRIPTION
21              
22             Data::Downloader allows one to download and maintain local repositories
23             of files. File metadata may be obtained from RSS or Atom feeds. Files
24             are stored using MD5 sums, and symbolic links can be created based on
25             the metadata for the files.
26              
27             A command line version of Data::Downloader, L, is also available.
28              
29             Data::Downloader uses an SQLite L
30             to store both the L, as well
31             as information about the Ls and
32             L stored
33             in a repository.
34              
35             Parameters may be sent when updating Ls;
36             parameters are replace tokens in URLs for the RSS/Atom feeds, just
37             as in an L URL template.
38              
39             =head1 BACKGROUND
40              
41             Several efforts are underway to extend Atom and RSS as a mechanism
42             for distribution of scientific data. For example, L provides a versatile response format
43             as well as a client capable of graphically oriented searches for data.
44             ESIP Discovery Services (e.g. L)
45             are working on specifications for server-side filtering of data, based
46             on the L specification.
47              
48             In addition, standards such as L and
49             L provide ways to represent structured
50             metadata for resources which are often downloaded and organized on a local disk.
51              
52             In addition to reading feeds, Data::Downloader may be used as an
53             L L which may
54             be populated directly without querying Atom/RSS feeds.
55              
56             =head1 EXAMPLE
57              
58             This is an example of subscribing to flickr's mrss feed to download
59             images. For more examples, please see L.
60              
61             my $images = Data::Downloader::Repository->new(
62             name => "images",
63             storage_root => "/usr/local/datastore/data",
64             cache_strategy => "Keep",
65             feeds => [
66             {
67             name => "flickr",
68             feed_template => 'http://api.flickr.com/services/feeds/photos_public.gne?tags=&lang=en-us&format=rss_200',
69             file_source => {
70             url_xpath => 'media:content/@url',
71             filename_xpath => 'media:content/@url',
72             filename_regex => '/([^/]*)$',
73             },
74             metadata_sources => [
75             { name => 'date_taken', xpath => 'dc:date.Taken' },
76             { name => 'tags', xpath => 'media:category' },
77             ],
78             },
79             ],
80             metadata_transformations => [
81             {
82             input => "tags",
83             output => "tag",
84             function_name => "split",
85             },
86             ],
87             linktrees => [
88             {
89             root => "/usr/local/datastore/by_tag",
90             condition => undef,
91             path_template => ""
92             },
93             {
94             root => "/usr/local/datastore/by_date",
95             condition => undef,
96             path_template => ""
97             },
98             ],
99             );
100              
101             $images->load(speculative => 1) or $images->save;
102              
103             for my $feed ($images->feeds) {
104             $feed->refresh(tags => "apples");
105             }
106              
107             $images->download_all;
108              
109             =head1 SEE ALSO
110              
111             L,
112             L,
113             L,
114             L,
115             L,
116             L,
117             L,
118             L
119              
120             =cut
121              
122             package Data::Downloader;
123 6     6   18502 use Rose::DB::Object::Loader;
  0            
  0            
124             use Lingua::EN::Inflect qw/def_noun/;
125             use Log::Log4perl qw(:easy);
126              
127             use Data::Downloader::DB;
128             use Data::Downloader::DB::Object;
129             use Data::Downloader::DB::Object::Cached;
130             use Data::Downloader::Config;
131             use Data::Downloader::MetadataPivot;
132             use Data::Downloader::FileMetadata;
133             use strict;
134              
135             def_noun "metadatum" => "metadata";
136              
137             our $VERSION = '0.9907';
138             our $db;
139             our $useProgressBars; # set during import to turn on Smart::Comments
140             our $setupDone;
141              
142             sub import {
143             my ($class,@params) = @_;
144             return if $setupDone;
145             $setupDone = 1;
146             $db = Data::Downloader::DB->new("main");
147             if (@params && grep /-init_logging/, @params) {
148             my ($level) = grep { /^(DEBUG|INFO|WARN|ERROR|FATAL)$/ } @params;
149             $level ||= "INFO";
150             my $init = <<"EOT";
151             log4perl.rootLogger = $level, Screen
152             log4perl.appender.Screen = Log::Log4perl::Appender::ScreenColoredLevels
153             log4perl.appender.Screen.layout = Log::Log4perl::Layout::PatternLayout
154             log4perl.appender.Screen.layout.ConversionPattern = @{[ $ENV{HARNESS_ACTIVE} ? '#' : '' ]} [%-5p] %d %F{1} (%L) %m %n
155             EOT
156             Log::Log4perl::init(\$init);
157             }
158             if (@params && grep /-use_progress_bars/, @params) {
159             $useProgressBars = 1;
160             }
161             if (! -s $db->database) {
162             INFO "initializing database ".$db->database."\n";
163             _initialize_db();
164             }
165             _setup_classes();
166             }
167              
168             sub _initialize_db {
169             local $/ = ';';
170             while () {
171             my $stmt = $_;
172             if (/CREATE TRIGGER/) {
173             do {
174             $stmt .= ( $_ = );
175             } until /END/ || !defined($_);
176             }
177             TRACE "SQL:\n$stmt\n";
178             next unless $stmt =~ /\w/;
179             $db->dbh->do($stmt) or die "failed to execute '$stmt'\n\n".$db->dbh->errstr;
180             }
181             }
182              
183             sub _setup_classes {
184             our $cm = Rose::DB::Object::ConventionManager->new();
185             $cm->tables_are_singular(1);
186             $cm->singular_to_plural_function(\&Lingua::EN::Inflect::PL);
187              
188             my $loader_dynamic = Rose::DB::Object::Loader->new(
189             base_class => "Data::Downloader::DB::Object",
190             db => $db,
191             db_class => "Data::Downloader::DB",
192             class_prefix => "Data::Downloader",
193             convention_manager => $cm,
194             post_init_hook => sub { shift->error_mode('return'); },
195             # TODO use log4perl for error mode somehow
196             );
197              
198             my $loader_cached = Rose::DB::Object::Loader->new(
199             base_class => "Data::Downloader::DB::Object::Cached",
200             db => $db,
201             db_class => "Data::Downloader::DB",
202             class_prefix => "Data::Downloader",
203             convention_manager => $cm,
204             post_init_hook => sub { shift->error_mode('return'); },
205             );
206              
207             my @config_tables = qw/repository feed disk feed_parameter file_source metadata_source metadata_transformation/;
208             my @classes = $loader_dynamic->make_classes(exclude_tables => \@config_tables)
209             or LOGDIE "Error: unable to load classes from ".$db->dbh->database;
210              
211             push @classes, $loader_cached->make_classes(include_tables => \@config_tables);
212              
213             for (@classes) {
214             eval "use $_";
215             no strict 'refs';
216             *{"${_}::init_db"} = sub { Data::Downloader::DB->new_or_cached("main") };
217             die "Errors using $_ : $@" if $@ && $@ !~ /Can't locate/;
218             }
219              
220             Data::Downloader::MetadataPivot->do_setup;
221             Data::Downloader::FileMetadata->do_setup;
222             }
223              
224             1;
225              
226             <<"=head1 SCHEMA"; # my trick to put __DATA__ into pod
227              
228             =head1 SCHEMA
229              
230             __DATA__