File Coverage

blib/lib/SeeAlso/Source/BeaconAggregator.pm
Criterion Covered Total %
statement 232 326 71.1
branch 120 248 48.3
condition 25 49 51.0
subroutine 32 33 96.9
pod 13 20 65.0
total 422 676 62.4


line stmt bran cond sub pod time code
1             package SeeAlso::Source::BeaconAggregator;
2 13     13   88727 use strict;
  13         13  
  13         277  
3 13     13   50 use warnings;
  13         14  
  13         267  
4              
5             BEGIN {
6 13     13   32 use Exporter ();
  13         60  
  13         239  
7 13     13   38 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  13         11  
  13         995  
8 13     13   19 $VERSION = '0.2_92';
9 13         86 @ISA = qw(Exporter);
10             #Give a hoot don't pollute, do not export more than needed by default
11 13         15 @EXPORT = qw();
12 13         11 @EXPORT_OK = qw();
13 13         236 %EXPORT_TAGS = ();
14             }
15              
16 13     13   43 use vars qw($DATA_VERSION);
  13         14  
  13         491  
17             $DATA_VERSION = 2;
18              
19 13     13   5028 use SeeAlso::Response;
  13         692647  
  13         331  
20 13     13   77 use base ("SeeAlso::Source");
  13         14  
  13         5502  
21              
22 13     13   364048 use DBI qw(:sql_types);
  13         151774  
  13         4147  
23 13     13   5488 use HTTP::Date;
  13         32581  
  13         616  
24              
25 13     13   63 use CGI;
  13         15  
  13         60  
26 13     13   438 use Carp;
  13         16  
  13         22531  
27              
28             #################### main pod documentation begin ###################
29             ## Below is the stub of documentation for your module.
30             ## You better edit it!
31              
32             =head1 NAME
33              
34             SeeAlso::Source::BeaconAggregator - Beacon files as source for SeeAlso::Server
35              
36             =head1 SYNOPSIS
37              
38             use CGI;
39             use SeeAlso::Identifier::ISSN;
40             use SeeAlso::Server;
41             use SeeAlso::Source::BeaconAggregator;
42              
43             my $srcdescription = {
44             "ShortName" => "TestService", # 16 Characters
45             "LongName" => "Sample SeeAlso Beacon Aggregator", # 48 characters
46             # "Description" => "The following services are contained: ...", # 1024 Characters
47             "DateModfied" => "...",
48             _dont_advertise => 1,
49             };
50              
51             my $CGI = CGI->new(); binmode(STDOUT, ":utf8");
52              
53             my $source = SeeAlso::Source::BeaconAggregator->new(
54             'file' => "/path/to/existing/database",
55             'identifierClass' => SeeAlso::Identifier::ISSN->new(),
56             'verbose' => 1,
57             'description' => $srcdescription,
58             );
59              
60             my $server = SeeAlso::Server->new (
61             'cgi' => $CGI,
62             xslt => "/client/showservice.xsl", # => +
63             clientbase => "/client/", # =>
64             expires => "+2d",
65             );
66              
67             my $rawid = $CGI->param('id') || "";
68             my $identifier = $rawid ? SeeAlso::Identifier::ISSN->new($rawid) : "";
69             my $result = $server->query($source, $identifier ? $identifier->value() : undef);
70             print $result;
71              
72              
73             =head1 DESCRIPTION
74              
75             This Module allows a collection of BEACON files (cf. http://de.wikipedia.org/wiki/Wikipedia:BEACON)
76             to be used as SeeAlso::Source (probably in the context of an SeeAlso::Server application).
77             Therefore it implements the four methods documented in SeeAlso::Source
78              
79             The BEACON files (lists of non-local identifiers of a certain type documenting the coverage of a given
80             online database plus means for access) are imported by the methods provided by
81             SeeAlso::Source::BeaconAggregator::Maintenance.pm, usually by employing the script sasbactrl.pl
82             as command line client.
83              
84             Serving other formats than SeeAlso or providing a BEACON file with respect to this
85             SeeAlso service is achieved by using SeeAlso::Source::BeaconAggregator::Publisher.
86              
87              
88             =head1 USAGE
89              
90              
91             =head2 Class methods
92              
93             =cut
94              
95             our %BeaconFields = ( # in den BEACON-Formaten definierte Felder
96             FORMAT => ['VARCHAR(16)', 1], # Pflicht
97             TARGET => ['VARCHAR(1024)', 1], # Pflicht, enthaelt {ID}
98             # PND-BEACON
99             VERSION => ['VARCHAR(16)'], # Only V0.1 supported
100             FEED => ['VARCHAR(255)'],
101             CONTACT => ['VARCHAR(63)'],
102             INSTITUTION => ['VARCHAR(1024)'],
103             ISIL => ['VARCHAR(64)'],
104             DESCRIPTION => ['VARCHAR(2048)'],
105             UPDATE => ['VARCHAR(63)'],
106             TIMESTAMP => ['INTEGER'],
107             REVISIT => ['INTEGER'],
108             # BEACON
109             EXAMPLES => ['VARCHAR(255)'],
110             MESSAGE => ['VARCHAR(255)'], # enthaelt {hits}
111             ONEMESSAGE => ['VARCHAR(255)'],
112             SOMEMESSAGE => ['VARCHAR(255)'],
113             PREFIX => ['VARCHAR(255)'],
114             # NEWER
115             COUNT => ['VARCHAR(255)'],
116             REMARK => ['VARCHAR(2048)'],
117             # WInofficial
118             NAME => ['VARCHAR(255)'],
119             # Experimental
120             ALTTARGET => ['VARCHAR(1024)'],
121             IMGTARGET => ['VARCHAR(1024)'],
122             );
123              
124              
125              
126             =head3 beaconfields ( [ $what ] )
127              
128             (Class method) Called without parameter returns an array of all valid field names
129             for meta headers
130              
131             @meta_supported = SeeAlso::Source::BeaconAggregator->beaconfields();
132              
133             With given parameter $what in scalar context returns the column
134             name of the database for the abstract field name. In array context
135             additionally the column type and optional flag designating a
136             mandatory entry are returned.
137              
138             $internal_col = SeeAlso::Source::BeaconAggregator->beaconfields('FORMAT');
139              
140             ($internal_col, $specs, $mandatory)
141             = SeeAlso::Source::BeaconAggregator->beaconfields('FORMAT');
142              
143             Fields are:
144              
145             # mandatory
146             FORMAT, TARGET
147             # as of BEACON spec
148             VERSION, FEED, TIMESTAMP, REVISIT, UPDATE
149             CONTACT, INSTITUTION, ISIL,
150             # from the experimental BEACON spec
151             MESSAGE, ONEMESSAGE, SOMEMESSAGE
152             PREFIX, EXAMPLES
153             # later additions
154             COUNT, REMARK
155             # current practise
156             NAME
157             # experimental extension "Konkordanzformat"
158             ALTTARGET, IMGTARGET
159              
160              
161             =cut
162              
163             sub beaconfields {
164 499     499 1 2264 my ($class, $what) = @_;
165 499 100       992 return keys %BeaconFields unless $what;
166 487 100       1626 return undef unless $BeaconFields{$what};
167 424 100       844 return wantarray ? ("bc$what", @{$BeaconFields{$what}}) : "bc$what";
  189         660  
168             }
169              
170              
171             our %OSDElements = ( # fuer OpensearchDescription deklarierte Felder
172             "ShortName" => "*", # <= 16 Zeichen, PFLICHT!
173             "Description" => "*", # <= 1024 Zeichen, PFLICHT!
174              
175             "Contact" => "*", # "nackte" Mailadresse user@domain, optional.
176             "Tags" => "*", # Liste von Einzelworten, <= 256 Zeichen, optional.
177             "LongName" => "*", # <= 48 Zeichen, optional.
178             "Developer" => "*", # <= 64 Zeichen, optional.
179             "Attribution" => "*", # <= 256 Zeichen, optional.
180             "SyndicationRight" => "open", # open, limited, private, closed
181             "AdultContent" => "false", # false/no/0: false, sonst: true
182              
183             "Language" => "*",
184             "InputEncoding" => "UTF-8",
185             "OutputEncoding" => "UTF-8",
186             # "dcterms:modified" => "",
187             # repeatable fields w/o contents, treated specially
188             # "Url" => {type => "*", template => "*"},
189             # "Query" => {role => "example", searchTerms => "*"},
190             # Special for the SeeAlso::Family
191             "Example" => "*",
192             "Examples" => "*",
193             "BaseURL" => "*", # Auto
194             "DateModified" => "*", # alias for dcterms:modified
195             "Source" => "*",
196             );
197              
198              
199             =head3 osdKeys ( [ $what ] )
200              
201             (Class method) Called without parameter returns an array of all valid element names
202             for the OpenSearchDescription:
203              
204             @meta_names = SeeAlso::Source::BeaconAggregator->osdKeys();
205              
206             With given parameter $what returns the value for the given OpenSearchDescription
207             element:
208              
209             $osd_value = SeeAlso::Source::BeaconAggregator->beaconfields('LongName');
210              
211             OSD elements are
212              
213             ShortName, Description
214             Contact, Tags, LongName, Developer, Attribution, SyndicationRight, AdultContent
215             Language, InputEncoding, OutputEncoding
216             # special for SeeAlso::Family
217             Example, Examples, BaseURL, DateModified, Source
218              
219             =cut
220              
221             sub osdKeys {
222 17     17 1 1987 my ($class, $what) = @_;
223 17 100       43 return keys %OSDElements unless $what;
224 16 100       473 return undef unless $OSDElements{$what};
225 12         34 return $OSDElements{$what};
226             }
227              
228              
229             =head2 SeeAlso::Source methods
230              
231             =head3 new( %accessor [, %options ] )
232              
233             Creates the SeeAlso::Source::BeaconAggregator object and connects to an existing
234             database previously created with the methods from
235             SeeAlso::Source::BeaconAggregator::Maintenance (currently SQLlite)
236              
237             Accessor options:
238              
239             =over 8
240              
241             =item dbh
242              
243             handle of a database already connected to
244              
245             =item dbroot
246              
247             optional path to prepend to dsn or file
248              
249             =item dsn
250              
251             directory name (directory contains the database file "-db"
252              
253             =item file
254              
255             full path of the database
256              
257             =back
258              
259             Other options:
260              
261             =over 8
262              
263             =item identifierClass
264              
265             contains an already instantiated object of that class
266              
267             =item verbose (0|1)
268              
269             =item description
270              
271             Hashref with options to be piped through to SeeAlso::Source
272              
273             =item aliasfilter
274              
275             Hashref with aliases to be filtered out from query results
276              
277             =item cluster
278              
279             dsn of a beacon source of identical identifier type giving a mapping (hash / altid)
280             e.g. invalidated identifiers -> current identifiers.
281              
282             When the identifier supplied for query() is mentioned in this table, the query will be
283             executed against the associated current identifier and all invalidated ones
284             (backward translation of forward translation).
285              
286             When not (the mapping might not necessarily include the identiy mapping),
287             the query behaves as if no "cluster" was given.
288              
289             For translation between different identifier schemes before querying,
290             use an appropriate SeeAlso::Identifier class.
291              
292              
293             =back
294              
295             Returns undef if unable to DBI->connect() to the database.
296              
297             =cut
298              
299             sub new {
300 20     20 1 12341 my ($class, %options) = @_;
301 20         61 my $self = {%options};
302 20         37 bless($self, $class);
303              
304 20 100       128 if ( $self->{dsn} ) {
305 18 50       106 croak("no special characters allowed for dsn") unless $self->{dsn} =~ /^[\w!,.{}-]+$/};
306              
307 20 50       68 if ( $self->{dbroot} ) {
308 0 0       0 return undef unless -d $self->{dbroot};
309 0 0       0 $self->{dbroot} .= "/" unless $self->{dbroot} =~ m!/$!;
310             };
311              
312 20         31 my $dbfile;
313 20 50       76 if ( $self->{dbh} ) { # called with handle...
    100          
    100          
314 0         0 return $self;
315             }
316             elsif ( $self->{dsn} ) {
317 18         59 $dbfile = $self->{dsn}."/".$self->{dsn}."-db";
318 18 50       46 (substr($dbfile, 0, 0) = $self->{dbroot}) if $self->{dbroot};
319             }
320             elsif ( $dbfile = $self->{file} ) {
321 1 50       5 if ( $self->{dbroot} ) {
322 0         0 substr($dbfile, 0, 0) = $self->{dbroot}};
323             };
324              
325 20 100       58 return undef unless $dbfile;
326              
327 19         148 my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", "", "",
328             {
329             # RaiseError => 1,
330             sqlite_unicode => 1,
331             });
332 19 50       107972 return undef unless $dbh;
333 19         47 $self->{dbh} = $dbh;
334              
335 19 50       50 if ( $self->{cluster} ) {
336 0         0 my $clusterfile = $self->{cluster}."/".$self->{cluster}."-db";
337 0 0       0 (substr($clusterfile, 0, 0) = $self->{dbroot}) if $self->{dbroot};
338 0 0       0 $dbh->do("ATTACH DATABASE '$clusterfile' AS cluster") or croak("error attaching cluster database '$clusterfile'");
339             };
340              
341 19         60 return $self;
342             }
343              
344              
345             =head3 description ()
346              
347             Inherited from SeeAlso::Source.
348              
349             =cut
350              
351             sub description {
352 4     4 1 1547 my $self = shift;
353 4 100       9 $self->enrichdescription() unless $self->{descriptioncached};
354 4         12 return $self->SUPER::description(@_);
355             }
356              
357             =head3 about ()
358              
359             Inherited from SeeAlso::Source.
360              
361             =cut
362              
363             sub about {
364 1     1 1 2245 my $self = shift;
365 1 50       5 $self->enrichdescription() unless $self->{descriptioncached};
366 1         7 return $self->SUPER::about(@_);
367             }
368              
369              
370             sub enrichdescription {
371 1     1 0 1 my ($self) = @_;
372 1         6 my $rawref = $self->OSDValues();
373 1         2 my %result;
374 1         4 foreach ( keys %$rawref ) {
375 0 0       0 next unless $rawref->{$_};
376 0 0       0 if ( ref($rawref->{$_}) ) { # List
377 0 0       0 if ( $_ =~ /^Example/ ) {
378 0         0 my @ary;
379 0         0 foreach my $item ( @{$rawref->{$_}} ) {
  0         0  
380 0 0       0 next unless $item;
381 0         0 my($i, $r) = split(/\s*\|\s*/, $item, 2);
382 0 0       0 next unless $i;
383 0 0       0 if ( $r ) {
384 0         0 push(@ary, {'id'=>$i, 'response'=>$r})}
385             else {
386 0         0 push(@ary, {'id'=>$i})}
387             }
388 0 0       0 $result{$_} = \@ary if @ary;
389             }
390             else {
391 0         0 $result{$_} = join(";\n", @{$rawref->{$_}})};
  0         0  
392             }
393             else { # Scalar
394 0 0       0 if ( $_ =~ /^Example/ ) {
395 0         0 my($i, $r) = split(/\s*\|\s*/, $rawref->{$_}, 2);
396 0 0       0 next unless $i;
397 0 0       0 if ( $r ) {
398 0         0 $result{$_} = [{'id'=>$i, 'response'=>$r}]}
399             else {
400 0         0 $result{$_} = [{'id'=>$i}]}
401             }
402             else {
403 0         0 $result{$_} = $rawref->{$_}};
404             }
405             };
406              
407              
408 1 50       4 if ( $self->{description} ) {
    50          
409 0         0 my %combined = (%result, %{$self->{description}});
  0         0  
410 0         0 $self->{description} = \%combined;
411             }
412             elsif ( %result ) {
413 0         0 $self->{description} = \%result};
414              
415 1         2 $self->{descriptioncached} = 1;
416             }
417              
418             ### Antworten fuer Anfragen als Format seealso
419              
420             =head3 set_aliasfilter ( @aliaslist )
421              
422             Init the hash with
423              
424             =cut
425              
426             sub set_aliasfilter {
427 2     2 1 4205 my ($self, @aliaslist) = @_;
428 2         4 $self->{'aliasfilter'} = { map { ($_, "") } @aliaslist };
  2         5  
429 2         6 return $self->{'aliasfilter'};
430             }
431              
432             =head3 query( [ $identifier] )
433              
434             Returns a SeeAlso::Response listing all matches to the given string or
435             SeeAlso::Identifier $identifier.
436              
437             =cut
438              
439             sub query { # SeeAlso-Simple response
440 11     11 1 23763 my ($self, $query) = @_;
441 11         18 my ($hash, $pretty, $canon) = $self->prepare_query($query);
442 11         25 my $response = SeeAlso::Response->new($canon);
443              
444 11         948 my $clusterid;
445 11 50       23 if ( $self->{cluster} ) {
446 0         0 my ($clusterh, $clusterexpl) = $self->stmtHdl("SELECT beacons.altid FROM cluster.beacons WHERE beacons.hash=? OR beacons.altid=? LIMIT 1;");
447 0 0       0 $self->stmtExplain($clusterexpl, $hash, $hash) if $ENV{'DBI_PROFILE'};
448 0         0 $clusterh->execute($hash, $hash);
449 0         0 while ( my $onerow = $clusterh->fetchrow_arrayref() ) {
450 0         0 $clusterid = $onerow->[0];}
451             }
452              
453             my ( $tfield, $afield, $mfield, $m1field, $msfield, $dfield, $nfield, $ifield)
454 11         16 = map{ scalar $self->beaconfields($_) }
  88         84  
455             # 6 7 8 9 10 11 12 13
456             qw(TARGET ALTTARGET MESSAGE ONEMESSAGE SOMEMESSAGE DESCRIPTION NAME INSTITUTION);
457             # 0 1 2 3 4 5
458             # 14 15
459 11         13 my ($sth, $sthexpl);
460 11 50       14 if ( $clusterid ) { # query IN cluster (leader id might not exist at LHS, therefore unionize with beacons.hash=$clusterid (!)
461 0         0 ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
462             SELECT beacons.hash, beacons.altid, beacons.seqno, beacons.hits, beacons.info, beacons.link,
463             repos.$tfield, repos.$afield, repos.$mfield, repos.$m1field, repos.$msfield, repos.$dfield, repos.$nfield, repos.$ifield,
464             repos.sort, repos.alias
465             FROM beacons NATURAL LEFT JOIN repos
466             WHERE ( (beacons.hash=?)
467             OR (beacons.hash IN (SELECT cluster.beacons.hash FROM cluster.beacons WHERE cluster.beacons.altid=?)) )
468             ORDER BY repos.sort, repos.alias;
469             XxX
470 0 0       0 $self->stmtExplain($sthexpl, $clusterid, $clusterid) if $ENV{'DBI_PROFILE'};
471 0 0       0 $sth->execute($clusterid, $clusterid) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
472             }
473             else { # simple query
474 11         35 ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
475             SELECT beacons.hash, beacons.altid, beacons.seqno, beacons.hits, beacons.info, beacons.link,
476             repos.$tfield, repos.$afield, repos.$mfield, repos.$m1field, repos.$msfield, repos.$dfield, repos.$nfield, repos.$ifield,
477             repos.sort, repos.alias
478             FROM beacons NATURAL LEFT JOIN repos
479             WHERE beacons.hash=?
480             ORDER BY repos.sort, repos.alias;
481             XxX
482 11 50       23 $self->stmtExplain($sthexpl, $hash) if $ENV{'DBI_PROFILE'};
483 11 50       635 $sth->execute($hash) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
484             }
485              
486 11   50     43 my $c = $self->{identifierClass} || undef;
487 11         7 my %didalready;
488 11         91 while ( my $onerow = $sth->fetchrow_arrayref() ) {
489             # last unless defined $onerow->[0]; # strange end condition
490 13 100 66     172 next if $onerow->[15] && exists $self->{'aliasfilter'}->{$onerow->[15]};
491              
492 11         9 my $hits = $onerow->[3];
493              
494 11         10 my $h = $onerow->[0];
495 11         7 my $p;
496 11 50 0     18 if ( $h eq $hash ) {
    0          
497 11         9 $p = $pretty}
498             elsif ( $clusterid && ref($c) ) {
499 0         0 $c->value("");
500 0   0     0 my $did = $c->hash($h) || $c->value($h) || $h;
501 0 0       0 $p = $c->can("pretty") ? $c->pretty() : $c->value();
502             };
503 11 0       41 $p = ($clusterid ? $h : $pretty) unless defined $p;
    50          
504              
505 11         6 my $uri;
506 11 100 66     36 if ( $uri = $onerow->[5] ) { # Expliziter Link
    100          
    50          
    0          
507             }
508             elsif ( $onerow->[1] && $onerow->[7] ) { # Konkordanzformat
509 4         8 $uri = sprintf($onerow->[7], $p, urlpseudoescape($onerow->[1]))}
510             elsif ( $onerow->[6] ) { # normales Beacon-Format
511 5         17 $uri = sprintf($onerow->[6], $p)}
512             elsif ( $onerow->[7] ) { # Neues Format
513 0         0 $uri = sprintf($onerow->[7], $p, urlpseudoescape($p))};
514 11 50       14 next unless $uri;
515              
516             # MESSAGE || NAME || INSTITUTION || DESCRIPTION
517 11   50     57 my $label = $onerow->[8] || $onerow->[12] || $onerow->[13] || $onerow->[11] || "???";
518 11 50       19 if ( $hits == 1 ) {
    50          
    0          
519 0 0       0 $label = $onerow->[9] if $onerow->[9]}
520             elsif ( $hits == 0 ) {
521 11 50       14 $label = $onerow->[10] if $onerow->[10]}
522             elsif ( $hits ) {
523 0 0       0 ($label .= " (%s)") unless ($label =~ /(^|[^%])%s/)};
524              
525             {
526 13     13   69 no warnings 'redundant';
  13         26  
  13         22861  
  11         7  
527 11         15 $label = sprintf($label, $hits);
528             }
529 11 100       16 $onerow->[4] = "" unless defined $onerow->[4];
530              
531             # my $description = $hits; # entsprechend opensearchsuggestions: pleonastisch, langweilig
532             # my $description = $onerow->[12] || $onerow->[13] || $onerow->[8] || $onerow->[10] || $onerow->[5]; # NAME or INSTITUTION or SOMEMESSAGE or MESSAGE
533             # DESCRIPTION || INSTITUTION || NAME || SOMEMESSAGE || MESSAGE || alias
534 11   50     36 my $description = $onerow->[11] || $onerow->[13] || $onerow->[12] || $onerow->[10] || $onerow->[8] || $onerow->[15] || ""; # INSTITUTION or NAME or SOMEMESSAGE or MESSAGE
535              
536             # Anreicherungen
537 11 100 66     30 if ( ($onerow->[4] =~ /\d{2}/) and ($onerow->[4] !~ /[a-wyz]/) ) {
538 1         3 $description .= " [".$onerow->[4]."]"} # add info
539             else {
540             # $onerow->[1] = "" unless defined $onerow->[1];
541 10 100       18 $label .= " [".$onerow->[4]."]" if $onerow->[4]; # add info
542 10 100       23 $description .= " [".$onerow->[1]."]" if $onerow->[1]; # Add target identifier
543             };
544              
545 11 50       65 $response->add($label, $description, $uri) unless $didalready{join("\x7f", $label, $description, $uri)}++;
546             }
547              
548 11         160 return $response;
549             }
550              
551             sub prepare_query {
552 11     11 0 8 my ($self, $query) = @_;
553 11         7 my ($hash, $pretty, $canon);
554             # search by: $hash
555             # forward by: $pretty
556             # normalize by: $canon
557 11         15 my $c = $self->{identifierClass};
558 11 50       20 if ( defined $c ) { # cast!
    50          
559 0 0       0 my $qval = ref($query) ? $query->as_string : $query;
560 0         0 $c->value($qval);
561 0         0 $hash = $c->hash();
562 0 0       0 $pretty = $c->can("pretty") ? $c->pretty() : $c->value();
563 0 0       0 $canon = $c->can("canonical") ? $c->canonical() : $c->value();
564             }
565             elsif ( ref($query) ) {
566 0         0 $hash = $query->hash();
567 0 0       0 $pretty = $query->can("pretty") ? $query->pretty() : $query->value();
568 0 0       0 $canon = $query->can("canonical") ? $query->canonical() : $query->value();
569             }
570             else {
571 11         12 $hash = $pretty = $canon = $query};
572              
573 11         18 return ($hash, $pretty, $canon);
574             }
575              
576              
577             ###
578              
579             =head2 Auxiliary Methods
580              
581             Sequence numbers (Seqnos) are primary keys to the database table where
582             each row contains the meta fields of one BEACON file
583              
584              
585             =head3 Seqnos ( $colname , $query )
586              
587             Return Seqnos from querying the table with all beacon headers in
588             column (field name) $colname for a $query
589             (which may contain SQL placeholders '%').
590              
591             =cut
592              
593             sub Seqnos {
594 10     10 1 2886 my ($self, $colname, $query) = @_;
595              
596 10   50     26 $colname ||= "";
597 10   50     23 $query ||= "";
598              
599 10         18 my $constraint = "";
600 10 50       33 if ( $query ) {
601 10         14 my $dbcolname = "";
602 10 100       65 if ( $colname =~ /^_(\w+)$/ ) {
    50          
603 9         25 $dbcolname = $1}
604             elsif ( $dbcolname = $self->beaconfields($colname) ) {}
605             else {
606 0         0 croak("column name '$colname' not known. Aborting")};
607              
608 10 100       45 $constraint = ($query =~ /%/) ? "WHERE $dbcolname LIKE ?"
609             : "WHERE $dbcolname=?";
610             };
611              
612 10         52 my $sth = $self->stmtHdl(<<"XxX");
613             SELECT seqno FROM repos $constraint ORDER BY seqno;
614             XxX
615             my $aryref = $self->{dbh}->selectcol_arrayref($sth, {Columns=>[1]}, ($query ? ($query) : ()))
616 10 50       111 or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
    50          
617 10 50       1158 return $aryref ? (@$aryref) : ();
618             }
619              
620              
621             =head3 RepoCols ( [ $colname [, $seqno_or_alias ]] )
622              
623             Return a hashref indexed by seqence number of all values of column (header field) $colname [alias]
624             optionally constrained by a SeqNo or Alias.
625              
626             Default for $colname is '_alias'.
627              
628             =cut
629              
630              
631             sub RepoCols {
632 10     10 1 4423 my ($self, $colname, $seqno_or_alias) = @_;
633 10   100     29 $colname ||= "_alias";
634 10   100     39 $seqno_or_alias ||= "";
635              
636 10         11 my $dbcolname = "";
637 10 100       52 if ( $colname =~ /^_(\w+)$/ ) {
    50          
638 1         2 $dbcolname = $1}
639             elsif ( $dbcolname = $self->beaconfields($colname) ) {}
640             else {
641 0         0 croak("column name '$colname' not known. Aborting")};
642              
643 10         22 my ($constraint, @cval) = mkConstraint($seqno_or_alias);
644 10         37 my $sth = $self->stmtHdl(<<"XxX");
645             SELECT seqno, $dbcolname FROM repos $constraint ORDER BY alias;
646             XxX
647             my $aryref = $self->{dbh}->selectcol_arrayref($sth, {Columns=>[1..2]}, @cval)
648 10 50       67 or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
649 10 50       590 if ( $aryref ) {
650 10         59 my %hash = @$aryref;
651 10         66 return \%hash;
652             };
653 0         0 return undef;
654             }
655              
656             sub mkConstraint {
657 39     39 0 118 local ($_) = @_;
658 39 100       141 return ("", ()) unless defined $_;
659 31 100       365 if ( /^%*$/ ) { return ("", ()) }
  9 100       22  
    50          
    50          
660 5         23 elsif ( /^\d+$/ ) { return (" WHERE seqno=?", $_) }
661 0         0 elsif ( /%/ ) { return (" WHERE alias LIKE ?", $_) }
662 17         67 elsif ( $_ ) { return (" WHERE alias=?", $_) }
663 0         0 else { return ("", ()) };
664             }
665              
666             =head3 OSDValues ( [ $key ] )
667              
668             Returns a hashref containing the OpenSearchDescription keywords and their
669             respective values.
670              
671             =cut
672              
673             sub OSDValues {
674 2     2 1 1837 my ($self, $key) = @_;
675 2   50     10 $key ||= "";
676              
677 2         2 my $constraint = "";
678 2 50       8 if ( $key =~ /%/ ) {
    50          
679 0         0 $constraint = " WHERE (key LIKE ?)"}
680             elsif ( $key ) {
681 0         0 $constraint = " WHERE (key=?)"};
682              
683 2         9 my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
684             SELECT key, val FROM osd $constraint;
685             XxX
686 2 0       6 $self->stmtExplain($sthexpl, ($key ? ($key) : ())) if $ENV{'DBI_PROFILE'};
    50          
687 2 50       100 $sth->execute(($key ? ($key) : ())) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
    50          
688              
689 2         6 my %result = ();
690 2         23 while ( my $aryref = $sth->fetchrow_arrayref ) {
691 7         7 my ($key, $val) = @$aryref;
692             # last unless defined $key; # undef on first call if nothing to be delivered?
693 7 50       14 next if $key =~ /^bc/; # BeaconMeta Fields smuggled in
694 7 100       16 if ( exists $result{$key} ) {
    50          
695 6 100       9 if ( ref($result{$key}) ) {
696 5         6 push(@{$result{$key}}, $val)}
  5         26  
697             else {
698 1         6 $result{$key} = [$result{$key}, $val]};
699             }
700             elsif ( $key eq "DateModified" ) {
701 0         0 $result{$key} = tToISO($val)}
702             else {
703 1         7 $result{$key} = $val};
704             };
705 2 100       6 return undef unless %result;
706 1         3 return \%result;
707             }
708              
709             =head3 admhash ( )
710              
711             Returns a hashref with the contents of the admin table (readonly, not tied).
712              
713             =cut
714              
715             sub admhash {
716 56     56 1 12128 my $self = shift;
717              
718             my ($admh, $admexpl) = $self->stmtHdl("SELECT key, val FROM admin;")
719 56 50       144 or croak("Could not prepare statement (dump admin table)".$self->{dbh}->errstr);
720 56 50       132 $self->stmtExplain($admexpl) if $ENV{'DBI_PROFILE'};
721 56 50       2850 $admh->execute() or croak("Could not execute statement (dump admin table): ".$admh->errstr);
722 56         140 my %adm = ();
723 56         464 while ( my $onerow = $admh->fetchrow_arrayref() ) {
724 158 50       463 if ( $admh->err ) {
725 0         0 croak("Could not iterate through admin table: ".$admh->errstr)};
726 158         232 my ($key, $val) = @$onerow;
727 158 50       1166 $adm{$key} = (defined $val) ? $val : "";
728             };
729 56         206 return \%adm;
730             }
731              
732              
733             =head3 autoIdentifier ()
734              
735             Initializes a missing C from the IDENTIFIER_CLASS entry in the admin table.
736              
737             =cut
738              
739             sub autoIdentifier {
740 30     30 1 4486 my ($self) = @_;
741              
742 30 100 66     139 return $self->{identifierClass} if exists $self->{identifierClass} && ref($self->{identifierClass});
743              
744             my ($admich, $admichexpl) = $self->stmtHdl("SELECT key, val FROM admin WHERE key=?;")
745 22 50       60 or croak("Could not prepare statement (dump admin table)".$self->{dbh}->errstr);
746 22 50       63 $self->stmtExplain($admichexpl, 'IDENTIFIER_CLASS') if $ENV{'DBI_PROFILE'};
747 22 50       1230 $admich->execute('IDENTIFIER_CLASS') or croak("Could not execute statement (IDENTIFIER_CLASS from admin table): ".$admich->errstr);
748 22         63 my %adm = ();
749 22         145 while ( my $onerow = $admich->fetchrow_arrayref() ) {
750 2 50       10 if ( $admich->err ) {
751 0         0 croak("Could not iterate through admin table: ".$admich->errstr)};
752 2         5 my ($key, $val) = @$onerow;
753 2   50     17 $adm{$key} = $val || "";
754             };
755              
756 22 100       66 if ( my $package = $adm{"IDENTIFIER_CLASS"} ) {
757 2         4 eval { $self->{identifierClass} = $package->new() };
  2         13  
758 2 50       48 return $self->{identifierClass} unless $@;
759              
760 0         0 eval {
761 0         0 (my $pkgpath = $package) =~ s=::=/=g; # require needs path...
762 0         0 require "$pkgpath.pm";
763 0         0 import $package;
764             };
765 0 0       0 if ( $@ ) {
766 0         0 croak "sorry: Identifier Class $package cannot be imported\n$@"};
767              
768 0         0 return $self->{identifierClass} = $package->new();
769             };
770 20         61 return undef;
771             }
772              
773              
774             =head3 findExample ( $goal, $offset, [ $sth ])
775              
776             Returns a hashref
777              
778             { id => identier,
779             response => Number of beacon files matching "/" Sum of individual hit counts
780             }
781              
782             for the C<$offset>'th identifier occuring in at least C<$goal> beacon instances.
783              
784             $sth will be initialized by a statement handle to pass to subsequent calls if
785             defined but false.
786              
787             =cut
788              
789             sub findExample {
790 4     4 1 4701 my ($self, $goal, $offset, $sth) = @_;
791 4         4 my $sthexpl;
792 4 100       9 unless ( $sth ) {
793 2         8 ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
794             SELECT hash, COUNT(*), SUM(hits) FROM beacons GROUP BY hash HAVING COUNT(*)>=? LIMIT 1 OFFSET ?;
795             XxX
796             #
797 2 100       7 $_[3] = $sth if defined $_[3];
798             };
799 4   100     13 $offset ||= 0;
800 4         32 $sth->bind_param(1, $goal, SQL_INTEGER);
801 4         9 $sth->bind_param(2, $offset, SQL_INTEGER);
802 4 0 33     9 if ( $sthexpl && $ENV{'DBI_PROFILE'} ) {
803 0         0 $sthexpl->[0]->bind_param(1, $goal, SQL_INTEGER);
804 0         0 $sthexpl->[0]->bind_param(2, $offset, SQL_INTEGER);
805 0         0 $self->stmtExplain($sthexpl);
806             };
807 4 50       285 $sth->execute() or croak("Could not execute canned sql (findExample): ".$sth->errstr);
808 4 100       49 if ( my $onerow = $sth->fetchrow_arrayref ) {
809 2 50       6 if ( defined $self->{identifierClass} ) {
810 0         0 my $c = $self->{identifierClass};
811             # compat: hash might not take an argument, must resort to value, has to be cleared before...
812 0         0 $c->value("");
813 0   0     0 my $did = $c->hash($onerow->[0]) || $c->value($onerow->[0]);
814 0 0       0 my $expanded = $c->can("pretty") ? $c->pretty() : $c->value();
815 0         0 return {id=>$expanded, response=>"$onerow->[1]/$onerow->[2]"};
816             }
817             else {
818 2         14 return {id=>$onerow->[0], response=>"$onerow->[1]/$onerow->[2]"}};
819             };
820 2         6 return undef;
821             };
822              
823             # Date prettyprint
824              
825             sub tToISO {
826 24   50 24 0 73 local($_) = HTTP::Date::time2isoz($_[0] || 0);
827 24         257 tr[ ][T];
828 24         66 return $_;
829             }
830              
831             # URL-encode data
832             sub urlpseudoescape { # we don't do a thorough job here, because it is not clear whether
833             # /a/b/c is a parameter ("/" must be encoded) or part of a path ("/" must not be encoded)
834             # and we must avoid URL-escaping already escaped content
835             # Therefore we only escape spaces and characters > 127
836 4     4 0 6 local ($_) = @_;
837             # $_ = pack("C0a*", $_); # Zeichen in Bytes zwingen
838 4         7 utf8::encode($_); # Zeichen in Bytes zwingen
839             # FYI
840             # reserved uri characters: [;/?:@&=+$,] by RFC 3986
841             # ;=%3B /=%2F ?=%3F :=%3A @=%40 &=%26 ==%3D +=%2B $=%24 ,=%2C
842             # delims = [<>#%"], unwise = [{}|\\\^\[\]`]
843             # mark (nreserved) = [-_.!~*'()]
844             # 222222257
845             # 1789ACEFE
846             # s/([^a-zA-Z0-9!'()*\-._~])/sprintf("%%%02X",ord($1))/eg;
847 4         8 s/([^\x21-\x7e])/sprintf("%%%02X",ord($1))/eg;
  4         12  
848 4         13 return $_;
849             }
850              
851              
852             # SQL handle management
853             sub stmtHdl {
854 340     340 0 518 my ($self, $sql, $errtext) = @_;
855 340   66     1129 $errtext ||= $sql;
856 340 50       720 my $if_active = $ENV{'DBI_PROFILE'} ? 0 : 1;
857             my $sth = $self->{dbh}->prepare_cached($sql, {}, $if_active)
858 340 50       2178 or croak("Could not prepare $errtext: ".$self->{dbh}->errstr);
859 340 100       26153 return $sth unless wantarray;
860 302 50       521 if ( $ENV{'DBI_PROFILE'} ) {
861 0         0 my @callerinfo = caller;
862 0 0       0 print STDERR "reusing handle for $sql (@callerinfo)===\n" if $sth->{Executed};
863             my $esth = $self->{dbh}->prepare_cached("EXPLAIN QUERY PLAN $sql", {}, 0)
864 0 0       0 or croak("Could not prepare explain query plan stmt: ".$self->{dbh}->errstr);
865 0         0 return $sth, [$esth, $sql];
866             }
867             else {
868 302         846 return $sth, undef};
869             };
870              
871             sub stmtExplain {
872 0     0 0   my ($self, $eref, @args) = @_;
873 0           my $esql = $eref->[1];
874 0           my @callerinfo = caller;
875 0           print STDERR "explain $esql\n\tfor data @args\n(@callerinfo)===\n";
876 0           my $esth = $eref->[0];
877 0 0         $esth->execute(@args) or croak("cannot execute explain statement $esql with args @args");
878 0           local $" = " | ";
879 0           while ( my $rowref = $esth->fetchrow_arrayref ) {
880 0           print STDERR "@$rowref\n";
881             }
882 0           print STDERR "===\n";
883             }
884              
885              
886             =head1 BUGS
887              
888              
889              
890             =head1 SUPPORT
891              
892             Send mail to the author
893              
894             =head1 AUTHOR
895              
896             Thomas Berger
897              
898             =head1 COPYRIGHT
899              
900             This program is free software; you can redistribute
901             it and/or modify it under the same terms as Perl itself.
902              
903             The full text of the license can be found in the
904             LICENSE file included with this module.
905              
906              
907             =head1 SEE ALSO
908              
909             perl(1).
910              
911             =cut
912              
913             #################### main pod documentation end ###################
914              
915             1;
916