File Coverage

blib/lib/Plack/App/RDF/LinkedData.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package Plack::App::RDF::LinkedData;
2 1     1   9 use strict;
  1         4  
  1         41  
3 1     1   9 use warnings;
  1         2  
  1         43  
4 1     1   10 use parent qw( Plack::Component );
  1         3  
  1         9  
5 1     1   1695 use RDF::LinkedData;
  0            
  0            
6             use URI::NamespaceMap;
7             use Plack::Request;
8              
9             =head1 NAME
10              
11             Plack::App::RDF::LinkedData - A Plack application for running RDF::LinkedData
12              
13             =head1 VERSION
14              
15             Version 0.99_02
16              
17             =cut
18              
19             our $VERSION = '0.99_02';
20              
21              
22             =head1 SYNOPSIS
23              
24             my $linkeddata = Plack::App::RDF::LinkedData->new();
25             $linkeddata->configure($config);
26             my $rdf_linkeddata = $linkeddata->to_app;
27              
28             builder {
29             enable "Head";
30             enable "ContentLength";
31             enable "ConditionalGET";
32             $rdf_linkeddata;
33             };
34              
35             =head1 DESCRIPTION
36              
37             This module sets up a basic Plack application to use
38             L<RDF::LinkedData> to serve Linked Data, while making sure it does
39             follow best practices for doing so. See the README for quick start,
40             the gory details are here.
41              
42             =head1 MAKE IT RUN
43              
44             =head2 Quick setup for a demo
45              
46             =head3 One-liner
47              
48             It is possible to make it run with a single command line, e.g.:
49              
50             PERLRDF_STORE="Memory;path/to/some/data.ttl" plackup -host localhost script/linked_data.psgi
51              
52             This will start a server with the default config on localhost on port
53             5000, so the URIs you're going serve from the file data.ttl will have
54             to have a base URI C<http://localhost:5000/>.
55              
56             =head3 Using perlrdf command line tool
57              
58             A slightly longer example requires L<App::perlrdf>, but sets up a
59             persistent SQLite-based triple store, parses a file and gets the
60             server with the default config running:
61              
62             export PERLRDF_STORE="DBI;mymodel;DBI:SQLite:database=rdf.db"
63             perlrdf make_store
64             perlrdf store_load path/to/some/data.ttl
65             plackup -host localhost script/linked_data.psgi
66              
67             =head2 Configuration
68              
69             To configure the system for production use, create a configuration
70             file C<rdf_linkeddata.json> that looks something like:
71              
72             {
73             "base_uri" : "http://localhost:3000/",
74             "store" : {
75             "storetype" : "Memory",
76             "sources" : [ {
77             "file" : "/path/to/your/data.ttl",
78             "syntax" : "turtle"
79             } ]
80              
81             },
82             "endpoint": {
83             "html": {
84             "resource_links": true
85             }
86             },
87             "expires" : "A86400" ,
88             "cors": {
89             "origins": "*"
90             },
91             "void": {
92             "pagetitle": "VoID Description for my dataset"
93             },
94             "fragments" : { "fragments_path" : "/fragments" }
95             }
96              
97             In your shell set
98              
99             export RDF_LINKEDDATA_CONFIG=/to/where/you/put/rdf_linkeddata.json
100              
101             Then, figure out where your install method installed the
102             <linked_data.psgi>, script, e.g. by using locate. If it was installed
103             in C</usr/local/bin>, go:
104              
105             plackup /usr/local/bin/linked_data.psgi --host localhost --port 3000
106              
107             The C<endpoint>-part of the config sets up a SPARQL Endpoint. This requires
108             the L<RDF::Endpoint> module, which is recommended by this module. To
109             use it, it needs to have some config, but will use defaults.
110              
111             It is also possible to set an C<expires> time. This needs
112             L<Plack::Middleware::Expires> and uses Apache C<mod_expires> syntax,
113             in the example above, it will set an expires header for all resources
114             to expire after 1 day of access.
115              
116             The C<cors>-part of the config enables Cross-Origin Resource
117             Sharing, which is a W3C Recommendation for relaxing security
118             constraints to allow data to be shared across domains. In most cases,
119             this is what you want when you are serving open data, but in some
120             cases, notably intranets, this should be turned off by removing this
121             part.
122              
123             The C<void>-part generates some statistics and a description of the
124             dataset, using RDF::Generator::Void. It is strongly recommended to
125             install and run that, but it can take some time to generate, so you
126             may have to set the detail level.
127              
128             Finally, C<fragments> add support for Triple Pattern Fragments, a
129             work-in-progress, It is a more lightweight but less powerful way to
130             query RDF data than SPARQL. If you have this, it is recommended to
131             have CORS enabled and required to have at least a minimal VoID setup.
132              
133             Note that in some environments, for example if the Plack server that
134             is dynamically configured, and/or behind a proxy server, the server
135             may fail to bind to the address you give it as hostname. In this case,
136             it is wise to allow the server to bind to any public IP address,
137             i.e. set the host name to 0.0.0.0.
138              
139             =head2 Details of the implementation
140              
141             This server is a minimal Plack-script that should be sufficient for
142             most linked data usages, and serve as a an example for most others.
143              
144             A minimal example of the required config file is provided above. There
145             is are longer examples in the distribution, which is used to run
146             tests. In the config file, there is a C<store> parameter, which must
147             contain the L<RDF::Trine::Store> config hashref. It may also have a
148             C<base_uri> URI and a C<namespace> hashref which may contain prefix -
149             URI mappings to be used in serializations. Certain namespace, namely
150             RDF, VoID, Hydra, DC Terms and XML Schema are added by the module and
151             do not need to be declared.
152              
153              
154             Note that this is a server that can only serve URIs of hosts you
155             control, it is not a general purpose Linked Data manipulation tool,
156             nor is it an implementation of Linked Data Platform or the Linked Data
157             API.
158              
159             The configuration is done using L<Config::ZOMG> and all its features
160             can be used. Importantly, you can set the C<RDF_LINKEDDATA_CONFIG>
161             environment variable to point to the config file you want to use. See
162             also L<Catalyst::Plugin::ConfigLoader> for more information on how to
163             use this config system.
164              
165             =head2 Behaviour
166              
167             The following documentation is adapted from RDF::LinkedData::Apache,
168             which preceded this module.
169              
170             =over 4
171              
172             =item * C<http://host.name/rdf/example>
173              
174             Will return an HTTP 303 redirect based on the value of the request's
175             Accept header. If the Accept header contains a recognized RDF media
176             type or there is no Accept header, the redirect will be to
177             C<http://host.name/rdf/example/data>, otherwise to
178             C<http://host.name/rdf/example/page>. If the URI has a foaf:homepage
179             or foaf:page predicate, the redirect will in the latter case instead
180             use the first encountered object URI.
181              
182             =item * C<http://host.name/rdf/example/data>
183              
184             Will return a bounded description of the C<http://host.name/rdf/example>
185             resource in an RDF serialization based on the Accept header. If the Accept
186             header does not contain a recognized media type, RDF/XML will be returned.
187              
188             =item * C<http://host.name/rdf/example/page>
189              
190             Will return an HTML description of the C<http://host.name/rdf/example>
191             resource including RDFa markup, or, if the URI has a foaf:homepage or
192             foaf:page predicate, a 301 redirect to that object.
193              
194             =back
195              
196             If the RDF resource for which data is requested is not the subject of any RDF
197             triples in the underlying triplestore, the /page and /data redirects will not take
198             place, and a HTTP 404 (Not Found) will be returned.
199              
200             The HTML description of resources will be enhanced by having metadata
201             about the predicate of RDF triples loaded into the same
202             triplestore. Currently, only a C<rdfs:label>-predicate will be used
203             for a title, as in this version, generation of HTML is done by
204             L<RDF::RDFa::Generator>.
205              
206             =head2 Endpoint Usage
207              
208             As stated earlier, this module can set up a SPARQL Endpoint for the
209             data using L<RDF::Endpoint>. Often, that's what you want, but if you
210             don't want your users to have that kind of power, or you're worried it
211             may overload your system, you may turn it off by simply having no
212             C<endpoint> section in your config. To use it, you just need to have
213             an C<endpoint> section with something in it, it doesn't really matter
214             what, as it will use defaults for everything that isn't set.
215              
216             L<RDF::Endpoint> is recommended by this module, but as it is optional,
217             you may have to install it separately. It has many configuration
218             options, please see its documentation for details.
219              
220             You may also need to set the C<RDF_ENDPOINT_SHAREDIR> variable to
221             wherever the endpoint shared files are installed to. These are some
222             CSS and Javascript files that enhance the user experience. They are
223             not strictly necessary, but it sure makes it pretty! L<RDF::Endpoint>
224             should do the right thing, though, so it shouldn't be necessary.
225              
226             Finally, note that while L<RDF::Endpoint> can serve these files for
227             you, this module doesn't help you do that. That's mostly because this
228             author thinks you should serve them using some other parts of the
229             deployment stack. For example, to use Apache, put this in your Apache
230             config in the appropriate C<VirtualHost> section:
231              
232              
233             Alias /js/ /path/to/share/www/js/
234             Alias /favicon.ico /path/to/share/www/favicon.ico
235             Alias /css/ /path/to/share/www/css/
236              
237             =head2 VoID Generator Usage
238              
239             Like a SPARQL Endpoint, this is something most users would want. In
240             fact, it is an even stronger recommendation than an endpoint. To
241             enable it, you must have L<RDF::Generator::Void> installed, and just
242             anything in the config file to enable it, like in the SYNOPSIS example.
243              
244             You can set several things in the config, the property attributes of
245             L<RDF::Generator::Void> can all be set there somehow. You can also set
246             C<pagetitle>, which sets the title for the RDFa page that can be
247             generated. Moreover, you can set titles in several languages for the
248             dataset using C<titles> as the key, pointing to an arrayref with
249             titles, where each title is a two element arrayref, where the first
250             element is the title itself and the second is the language for that
251             title.
252              
253             Please refer to the L<RDF::Generator::Void> for more details about
254             what can be set, and the C<rdf_linkeddata_void.json> test config in
255             the distribution for example.
256              
257             By adding an C<add_void> config key, you can make pass a file to the
258             generator so that arbitrary RDF can be added to the VoID
259             description. It will check the last modification time of the file and
260             only update the VoID description if it has been modified. This is
261             useful since as much of the VoID description is expensive to
262             compute. To use it, the configuration would in JSON look something
263             like this:
264              
265             "add_void": { "file": "/data/add.ttl", "syntax": "turtle" }
266              
267             where C<file> is the full path to RDF that should be added and
268             C<syntax> is needed by the parser to parse it.
269              
270             Normally, the VoID description is cached in RAM and the store ETag is
271             checked on every request to see if the description must be
272             regenerated. If you use the C<add_void> feature, you can force
273             regeneration on the next request by touching the file.
274              
275             =head1 FEEDBACK WANTED
276              
277             Please contact the author if this documentation is unclear. It is
278             really very simple to get it running, so if it appears difficult, this
279             documentation is most likely to blame.
280              
281              
282              
283             =head1 METHODS
284              
285             You would most likely not need to call these yourself, but rather use
286             the C<linked_data.psgi> script supplied with the distribution.
287              
288             =over
289              
290             =item C<< configure >>
291              
292             This is the only method you would call manually, as it can be used to
293             pass a hashref with configuration to the application.
294              
295             =cut
296              
297             sub configure {
298             my $self = shift;
299             $self->{config} = shift;
300             return $self;
301             }
302              
303              
304             =item C<< prepare_app >>
305              
306             Will be called by Plack to set the application up.
307              
308             =item C<< call >>
309              
310             Will be called by Plack to process the request.
311              
312             =back
313              
314             =cut
315              
316              
317             sub prepare_app {
318             my $self = shift;
319             my $config = $self->{config};
320             $self->{linkeddata} = RDF::LinkedData->new(store => $config->{store},
321             endpoint_config => $config->{endpoint},
322             void_config => $config->{void},
323             fragments_config => $config->{fragments},
324             base_uri => $config->{base_uri}
325             );
326             $self->{linkeddata}->namespaces(URI::NamespaceMap->new($config->{namespaces})) if ($config->{namespaces});
327             # Ensure that certain namespaces are always declared
328             $self->{linkeddata}->guess_namespaces('rdf', 'dc', 'xsd', 'void');
329             $self->{linkeddata}->add_namespace_mapping(hydra => 'http://www.w3.org/ns/hydra/core#');
330             }
331              
332             sub call {
333             my($self, $env) = @_;
334             my $req = Plack::Request->new($env);
335             my $uri = $req->uri;
336             my $ld = $self->{linkeddata};
337             my $endpoint_path;
338             if ($ld->has_endpoint) {
339             $endpoint_path = $ld->endpoint_config->{endpoint_path};
340             }
341             unless (($req->method eq 'GET') || ($req->method eq 'HEAD')
342             || (($req->method eq 'POST') && defined($endpoint_path) && ($uri =~ m|$endpoint_path$|))) {
343             return [ 405, [ 'Content-type', 'text/plain' ], [ 'Method not allowed' ] ];
344             }
345              
346             if (($uri->path eq '/.well-known/void') && ($ld->has_void)) {
347             return [ 302, [ 'Location', $ld->base_uri . '/' ], [ '' ] ];
348             }
349              
350             if ($uri->as_iri =~ m!^(.+?)/?(page|data)$!) {
351             $uri = URI->new($1);
352             $ld->type($2);
353             }
354             $ld->request($req);
355             return $ld->response($uri)->finalize;
356             }
357              
358             1;
359              
360              
361              
362             =head1 AUTHOR
363              
364             Kjetil Kjernsmo, C<< <kjetilk@cpan.org> >>
365              
366             =head1 COPYRIGHT & LICENSE
367              
368             Copyright 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Kjetil Kjernsmo
369              
370             This program is free software; you can redistribute it and/or modify it
371             under the same terms as Perl itself.
372              
373              
374             =cut