File Coverage

blib/lib/SeeAlso/Source/BeaconAggregator.pm
Criterion Covered Total %
statement 21 23 91.3
branch n/a
condition n/a
subroutine 7 7 100.0
pod n/a
total 28 30 93.3


line stmt bran cond sub pod time code
1             package SeeAlso::Source::BeaconAggregator;
2 13     13   38083 use strict;
  13         22  
  13         411  
3 13     13   53 use warnings;
  13         13  
  13         351  
4              
5             BEGIN {
6 13     13   53 use Exporter ();
  13         77  
  13         255  
7 13     13   479 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  13         14  
  13         1226  
8 13     13   21 $VERSION = '0.2_88';
9 13         135 @ISA = qw(Exporter);
10             #Give a hoot don't pollute, do not export more than needed by default
11 13         54 @EXPORT = qw();
12 13         17 @EXPORT_OK = qw();
13 13         286 %EXPORT_TAGS = ();
14             }
15              
16 13     13   73 use vars qw($DATA_VERSION);
  13         36  
  13         556  
17             $DATA_VERSION = 2;
18              
19 13     13   3081 use SeeAlso::Response;
  0            
  0            
20             use base ("SeeAlso::Source");
21              
22             use DBI qw(:sql_types);
23             use HTTP::Date;
24              
25             use CGI;
26             use Carp;
27              
28             #################### main pod documentation begin ###################
29             ## Below is the stub of documentation for your module.
30             ## You better edit it!
31              
32             =head1 NAME
33              
34             SeeAlso::Source::BeaconAggregator - Beacon files as source for SeeAlso::Server
35              
36             =head1 SYNOPSIS
37              
38             use CGI;
39             use SeeAlso::Identifier::ISSN;
40             use SeeAlso::Server;
41             use SeeAlso::Source::BeaconAggregator;
42              
43             my $srcdescription = {
44             "ShortName" => "TestService", # 16 Characters
45             "LongName" => "Sample SeeAlso Beacon Aggregator", # 48 characters
46             # "Description" => "The following services are contained: ...", # 1024 Characters
47             "DateModfied" => "...",
48             _dont_advertise => 1,
49             };
50              
51             my $CGI = CGI->new(); binmode(STDOUT, ":utf8");
52              
53             my $source = SeeAlso::Source::BeaconAggregator->new(
54             'file' => "/path/to/existing/database",
55             'identifierClass' => SeeAlso::Identifier::ISSN->new(),
56             'verbose' => 1,
57             'description' => $srcdescription,
58             );
59              
60             my $server = SeeAlso::Server->new (
61             'cgi' => $CGI,
62             xslt => "/client/showservice.xsl", # => +
63             clientbase => "/client/", # =>
64             expires => "+2d",
65             );
66              
67             my $rawid = $CGI->param('id') || "";
68             my $identifier = $rawid ? SeeAlso::Identifier::ISSN->new($rawid) : "";
69             my $result = $server->query($source, $identifier ? $identifier->value() : undef);
70             print $result;
71              
72              
73             =head1 DESCRIPTION
74              
75             This Module allows a collection of BEACON files (cf. http://de.wikipedia.org/wiki/Wikipedia:BEACON)
76             to be used as SeeAlso::Source (probably in the context of an SeeAlso::Server application).
77             Therefore it implements the four methods documented in SeeAlso::Source
78              
79             The BEACON files (lists of non-local identifiers of a certain type documenting the coverage of a given
80             online database plus means for access) are imported by the methods provided by
81             SeeAlso::Source::BeaconAggregator::Maintenance.pm, usually by employing the script sasbactrl.pl
82             as command line client.
83              
84             Serving other formats than SeeAlso or providing a BEACON file with respect to this
85             SeeAlso service is achieved by using SeeAlso::Source::BeaconAggregator::Publisher.
86              
87              
88             =head1 USAGE
89              
90              
91             =head2 Class methods
92              
93             =cut
94              
95             our %BeaconFields = ( # in den BEACON-Formaten definierte Felder
96             FORMAT => ['VARCHAR(16)', 1], # Pflicht
97             TARGET => ['VARCHAR(1024)', 1], # Pflicht, enthaelt {ID}
98             # PND-BEACON
99             VERSION => ['VARCHAR(16)'], # Only V0.1 supported
100             FEED => ['VARCHAR(255)'],
101             CONTACT => ['VARCHAR(63)'],
102             INSTITUTION => ['VARCHAR(1024)'],
103             ISIL => ['VARCHAR(64)'],
104             DESCRIPTION => ['VARCHAR(2048)'],
105             UPDATE => ['VARCHAR(63)'],
106             TIMESTAMP => ['INTEGER'],
107             REVISIT => ['INTEGER'],
108             # BEACON
109             EXAMPLES => ['VARCHAR(255)'],
110             MESSAGE => ['VARCHAR(255)'], # enthaelt {hits}
111             ONEMESSAGE => ['VARCHAR(255)'],
112             SOMEMESSAGE => ['VARCHAR(255)'],
113             PREFIX => ['VARCHAR(255)'],
114             # NEWER
115             COUNT => ['VARCHAR(255)'],
116             REMARK => ['VARCHAR(2048)'],
117             # WInofficial
118             NAME => ['VARCHAR(255)'],
119             # Experimental
120             ALTTARGET => ['VARCHAR(1024)'],
121             IMGTARGET => ['VARCHAR(1024)'],
122             );
123              
124              
125              
126             =head3 beaconfields ( [ $what ] )
127              
128             (Class method) Called without parameter returns an array of all valid field names
129             for meta headers
130              
131             @meta_supported = SeeAlso::Source::BeaconAggregator->beaconfields();
132              
133             With given parameter $what in scalar context returns the column
134             name of the database for the abstract field name. In array context
135             additionally the column type and optional flag designating a
136             mandatory entry are returned.
137              
138             $internal_col = SeeAlso::Source::BeaconAggregator->beaconfields('FORMAT');
139              
140             ($internal_col, $specs, $mandatory)
141             = SeeAlso::Source::BeaconAggregator->beaconfields('FORMAT');
142              
143             Fields are:
144              
145             # mandatory
146             FORMAT, TARGET
147             # as of BEACON spec
148             VERSION, FEED, TIMESTAMP, REVISIT, UPDATE
149             CONTACT, INSTITUTION, ISIL,
150             # from the experimental BEACON spec
151             MESSAGE, ONEMESSAGE, SOMEMESSAGE
152             PREFIX, EXAMPLES
153             # later additions
154             COUNT, REMARK
155             # current practise
156             NAME
157             # experimental extension "Konkordanzformat"
158             ALTTARGET, IMGTARGET
159              
160              
161             =cut
162              
163             sub beaconfields {
164             my ($class, $what) = @_;
165             return keys %BeaconFields unless $what;
166             return undef unless $BeaconFields{$what};
167             return wantarray ? ("bc$what", @{$BeaconFields{$what}}) : "bc$what";
168             }
169              
170              
171             our %OSDElements = ( # fuer OpensearchDescription deklarierte Felder
172             "ShortName" => "*", # <= 16 Zeichen, PFLICHT!
173             "Description" => "*", # <= 1024 Zeichen, PFLICHT!
174              
175             "Contact" => "*", # "nackte" Mailadresse user@domain, optional.
176             "Tags" => "*", # Liste von Einzelworten, <= 256 Zeichen, optional.
177             "LongName" => "*", # <= 48 Zeichen, optional.
178             "Developer" => "*", # <= 64 Zeichen, optional.
179             "Attribution" => "*", # <= 256 Zeichen, optional.
180             "SyndicationRight" => "open", # open, limited, private, closed
181             "AdultContent" => "false", # false/no/0: false, sonst: true
182              
183             "Language" => "*",
184             "InputEncoding" => "UTF-8",
185             "OutputEncoding" => "UTF-8",
186             # "dcterms:modified" => "",
187             # repeatable fields w/o contents, treated specially
188             # "Url" => {type => "*", template => "*"},
189             # "Query" => {role => "example", searchTerms => "*"},
190             # Special for the SeeAlso::Family
191             "Example" => "*",
192             "Examples" => "*",
193             "BaseURL" => "*", # Auto
194             "DateModified" => "*", # alias for dcterms:modified
195             "Source" => "*",
196             );
197              
198              
199             =head3 osdKeys ( [ $what ] )
200              
201             (Class method) Called without parameter returns an array of all valid element names
202             for the OpenSearchDescription:
203              
204             @meta_names = SeeAlso::Source::BeaconAggregator->osdKeys();
205              
206             With given parameter $what returns the value for the given OpenSearchDescription
207             element:
208              
209             $osd_value = SeeAlso::Source::BeaconAggregator->beaconfields('LongName');
210              
211             OSD elements are
212              
213             ShortName, Description
214             Contact, Tags, LongName, Developer, Attribution, SyndicationRight, AdultContent
215             Language, InputEncoding, OutputEncoding
216             # special for SeeAlso::Family
217             Example, Examples, BaseURL, DateModified, Source
218              
219             =cut
220              
221             sub osdKeys {
222             my ($class, $what) = @_;
223             return keys %OSDElements unless $what;
224             return undef unless $OSDElements{$what};
225             return $OSDElements{$what};
226             }
227              
228              
229             =head2 SeeAlso::Source methods
230              
231             =head3 new( %accessor [, %options ] )
232              
233             Creates the SeeAlso::Source::BeaconAggregator object and connects to an existing
234             database previously created with the methods from
235             SeeAlso::Source::BeaconAggregator::Maintenance (currently SQLlite)
236              
237             Accessor options:
238              
239             =over 8
240              
241             =item dbh
242              
243             handle of a database already connected to
244              
245             =item dbroot
246              
247             optional path to prepend to dsn or file
248              
249             =item dsn
250              
251             directory name (directory contains the database file "-db"
252              
253             =item file
254              
255             full path of the database
256              
257             =back
258              
259             Other options:
260              
261             =over 8
262              
263             =item identifierClass
264              
265             contains an already instantiated object of that class
266              
267             =item verbose (0|1)
268              
269             =item description
270              
271             Hashref with options to be piped through to SeeAlso::Source
272              
273             =item aliasfilter
274              
275             Hashref with aliases to be filtered out from query results
276              
277             =item cluster
278              
279             dsn of a beacon source of identical identifier type giving a mapping (hash / altid)
280             e.g. invalidated identifiers -> current identifiers.
281              
282             When the identifier supplied for query() is mentioned in this table, the query will be
283             executed against the associated current identifier and all invalidated ones
284             (backward translation of forward translation).
285              
286             When not (the mapping might not necessarily include the identiy mapping),
287             the query behaves as if no "cluster" was given.
288              
289             For translation between different identifier schemes before querying,
290             use an appropriate SeeAlso::Identifier class.
291              
292              
293             =back
294              
295             Returns undef if unable to DBI->connect() to the database.
296              
297             =cut
298              
299             sub new {
300             my ($class, %options) = @_;
301             my $self = {%options};
302             bless($self, $class);
303              
304             if ( $self->{dsn} ) {
305             croak("no special characters allowed for dsn") unless $self->{dsn} =~ /^[\w!,.{}-]+$/};
306              
307             if ( $self->{dbroot} ) {
308             return undef unless -d $self->{dbroot};
309             $self->{dbroot} .= "/" unless $self->{dbroot} =~ m!/$!;
310             };
311              
312             my $dbfile;
313             if ( $self->{dbh} ) { # called with handle...
314             return $self;
315             }
316             elsif ( $self->{dsn} ) {
317             $dbfile = $self->{dsn}."/".$self->{dsn}."-db";
318             (substr($dbfile, 0, 0) = $self->{dbroot}) if $self->{dbroot};
319             }
320             elsif ( $dbfile = $self->{file} ) {
321             if ( $self->{dbroot} ) {
322             substr($dbfile, 0, 0) = $self->{dbroot}};
323             };
324              
325             return undef unless $dbfile;
326              
327             my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", "", "",
328             {
329             # RaiseError => 1,
330             sqlite_unicode => 1,
331             });
332             return undef unless $dbh;
333             $self->{dbh} = $dbh;
334              
335             if ( $self->{cluster} ) {
336             my $clusterfile = $self->{cluster}."/".$self->{cluster}."-db";
337             (substr($clusterfile, 0, 0) = $self->{dbroot}) if $self->{dbroot};
338             $dbh->do("ATTACH DATABASE '$clusterfile' AS cluster") or croak("error attaching cluster database '$clusterfile'");
339             };
340              
341             return $self;
342             }
343              
344              
345             =head3 description ()
346              
347             Inherited from SeeAlso::Source.
348              
349             =cut
350              
351             sub description {
352             my $self = shift;
353             $self->enrichdescription() unless $self->{descriptioncached};
354             return $self->SUPER::description(@_);
355             }
356              
357             =head3 about ()
358              
359             Inherited from SeeAlso::Source.
360              
361             =cut
362              
363             sub about {
364             my $self = shift;
365             $self->enrichdescription() unless $self->{descriptioncached};
366             return $self->SUPER::about(@_);
367             }
368              
369              
370             sub enrichdescription {
371             my ($self) = @_;
372             my $rawref = $self->OSDValues();
373             my %result;
374             foreach ( keys %$rawref ) {
375             next unless $rawref->{$_};
376             if ( ref($rawref->{$_}) ) { # List
377             if ( $_ =~ /^Example/ ) {
378             my @ary;
379             foreach my $item ( @{$rawref->{$_}} ) {
380             next unless $item;
381             my($i, $r) = split(/\s*\|\s*/, $item, 2);
382             next unless $i;
383             if ( $r ) {
384             push(@ary, {'id'=>$i, 'response'=>$r})}
385             else {
386             push(@ary, {'id'=>$i})}
387             }
388             $result{$_} = \@ary if @ary;
389             }
390             else {
391             $result{$_} = join(";\n", @{$rawref->{$_}})};
392             }
393             else { # Scalar
394             if ( $_ =~ /^Example/ ) {
395             my($i, $r) = split(/\s*\|\s*/, $rawref->{$_}, 2);
396             next unless $i;
397             if ( $r ) {
398             $result{$_} = [{'id'=>$i, 'response'=>$r}]}
399             else {
400             $result{$_} = [{'id'=>$i}]}
401             }
402             else {
403             $result{$_} = $rawref->{$_}};
404             }
405             };
406              
407              
408             if ( $self->{description} ) {
409             my %combined = (%result, %{$self->{description}});
410             $self->{description} = \%combined;
411             }
412             elsif ( %result ) {
413             $self->{description} = \%result};
414              
415             $self->{descriptioncached} = 1;
416             }
417              
418             ### Antworten fuer Anfragen als Format seealso
419              
420             =head3 set_aliasfilter ( @aliaslist )
421              
422             Init the hash with
423              
424             =cut
425              
426             sub set_aliasfilter {
427             my ($self, @aliaslist) = @_;
428             $self->{'aliasfilter'} = { map { ($_, "") } @aliaslist };
429             return $self->{'aliasfilter'};
430             }
431              
432             =head3 query( [ $identifier] )
433              
434             Returns a SeeAlso::Response listing all matches to the given string or
435             SeeAlso::Identifier $identifier.
436              
437             =cut
438              
439             sub query { # SeeAlso-Simple response
440             my ($self, $query) = @_;
441             my ($hash, $pretty, $canon) = $self->prepare_query($query);
442             my $response = SeeAlso::Response->new($canon);
443              
444             my $clusterid;
445             if ( $self->{cluster} ) {
446             my ($clusterh, $clusterexpl) = $self->stmtHdl("SELECT beacons.altid FROM cluster.beacons WHERE beacons.hash=? OR beacons.altid=? LIMIT 1;");
447             $self->stmtExplain($clusterexpl, $hash, $hash) if $ENV{'DBI_PROFILE'};
448             $clusterh->execute($hash, $hash);
449             while ( my $onerow = $clusterh->fetchrow_arrayref() ) {
450             $clusterid = $onerow->[0];}
451             }
452              
453             my ( $tfield, $afield, $mfield, $m1field, $msfield, $dfield, $nfield, $ifield)
454             = map{ scalar $self->beaconfields($_) }
455             # 6 7 8 9 10 11 12 13
456             qw(TARGET ALTTARGET MESSAGE ONEMESSAGE SOMEMESSAGE DESCRIPTION NAME INSTITUTION);
457             # 0 1 2 3 4 5
458             # 14 15
459             my ($sth, $sthexpl);
460             if ( $clusterid ) { # query IN cluster (leader id might not exist at LHS, therefore unionize with beacons.hash=$clusterid (!)
461             ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
462             SELECT beacons.hash, beacons.altid, beacons.seqno, beacons.hits, beacons.info, beacons.link,
463             repos.$tfield, repos.$afield, repos.$mfield, repos.$m1field, repos.$msfield, repos.$dfield, repos.$nfield, repos.$ifield,
464             repos.sort, repos.alias
465             FROM beacons NATURAL LEFT JOIN repos
466             WHERE ( (beacons.hash=?)
467             OR (beacons.hash IN (SELECT cluster.beacons.hash FROM cluster.beacons WHERE cluster.beacons.altid=?)) )
468             ORDER BY repos.sort, repos.alias;
469             XxX
470             $self->stmtExplain($sthexpl, $clusterid, $clusterid) if $ENV{'DBI_PROFILE'};
471             $sth->execute($clusterid, $clusterid) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
472             }
473             else { # simple query
474             ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
475             SELECT beacons.hash, beacons.altid, beacons.seqno, beacons.hits, beacons.info, beacons.link,
476             repos.$tfield, repos.$afield, repos.$mfield, repos.$m1field, repos.$msfield, repos.$dfield, repos.$nfield, repos.$ifield,
477             repos.sort, repos.alias
478             FROM beacons NATURAL LEFT JOIN repos
479             WHERE beacons.hash=?
480             ORDER BY repos.sort, repos.alias;
481             XxX
482             $self->stmtExplain($sthexpl, $hash) if $ENV{'DBI_PROFILE'};
483             $sth->execute($hash) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
484             }
485              
486             my $c = $self->{identifierClass} || undef;
487             my %didalready;
488             while ( my $onerow = $sth->fetchrow_arrayref() ) {
489             # last unless defined $onerow->[0]; # strange end condition
490             next if $onerow->[15] && exists $self->{'aliasfilter'}->{$onerow->[15]};
491              
492             my $hits = $onerow->[3];
493              
494             my $h = $onerow->[0];
495             my $p;
496             if ( $h eq $hash ) {
497             $p = $pretty}
498             elsif ( $clusterid && ref($c) ) {
499             $c->value("");
500             my $did = $c->hash($h) || $c->value($h) || $h;
501             $p = $c->can("pretty") ? $c->pretty() : $c->value();
502             };
503             $p = ($clusterid ? $h : $pretty) unless defined $p;
504              
505             my $uri;
506             if ( $uri = $onerow->[5] ) { # Expliziter Link
507             }
508             elsif ( $onerow->[1] && $onerow->[7] ) { # Konkordanzformat
509             $uri = sprintf($onerow->[7], $p, urlpseudoescape($onerow->[1]))}
510             elsif ( $onerow->[6] ) { # normales Beacon-Format
511             $uri = sprintf($onerow->[6], $p)}
512             elsif ( $onerow->[7] ) { # Neues Format
513             $uri = sprintf($onerow->[7], $p, urlpseudoescape($p))};
514             next unless $uri;
515              
516             # MESSAGE || NAME || INSTITUTION || DESCRIPTION
517             my $label = $onerow->[8] || $onerow->[12] || $onerow->[13] || $onerow->[11] || "???";
518             if ( $hits == 1 ) {
519             $label = $onerow->[9] if $onerow->[9]}
520             elsif ( $hits == 0 ) {
521             $label = $onerow->[10] if $onerow->[10]}
522             elsif ( $hits ) {
523             ($label .= " (%s)") unless ($label =~ /(^|[^%])%s/)};
524              
525             $label = sprintf($label, $hits);
526             $onerow->[4] = "" unless defined $onerow->[4];
527             $label .= " [".$onerow->[4]."]" if ($onerow->[4] =~ /[a-wyz]/) or ($onerow->[4] && ($onerow->[4] !~ /\d/)); # add info
528              
529             # my $description = $hits; # entsprechend opensearchsuggestions: pleonastisch, langweilig
530             # my $description = $onerow->[12] || $onerow->[13] || $onerow->[8] || $onerow->[10] || $onerow->[5]; # NAME or INSTITUTION or SOMEMESSAGE or MESSAGE
531             # DESCRIPTION || INSTITUTION || NAME || SOMEMESSAGE || MESSAGE || alias
532             my $description = $onerow->[11] || $onerow->[13] || $onerow->[12] || $onerow->[10] || $onerow->[8] || $onerow->[15] || ""; # INSTITUTION or NAME or SOMEMESSAGE or MESSAGE
533             if ( ($onerow->[4] =~ /\d/) and ($onerow->[4] !~ /[a-wyz]/) ) {
534             $description .= " [".$onerow->[4]."]"} # add info
535             else {
536             # $onerow->[1] = "" unless defined $onerow->[1];
537             # $description .= " [".$onerow->[1]."]" if $onerow->[1]; # Add target identifier
538             };
539              
540             $response->add($label, $description, $uri) unless $didalready{join("\x7f", $label, $description, $uri)}++;
541             }
542              
543             return $response;
544             }
545              
546             sub prepare_query {
547             my ($self, $query) = @_;
548             my ($hash, $pretty, $canon);
549             # search by: $hash
550             # forward by: $pretty
551             # normalize by: $canon
552             my $c = $self->{identifierClass};
553             if ( defined $c ) { # cast!
554             my $qval = ref($query) ? $query->as_string : $query;
555             $c->value($qval);
556             $hash = $c->hash();
557             $pretty = $c->can("pretty") ? $c->pretty() : $c->value();
558             $canon = $c->can("canonical") ? $c->canonical() : $c->value();
559             }
560             elsif ( ref($query) ) {
561             $hash = $query->hash();
562             $pretty = $query->can("pretty") ? $query->pretty() : $query->value();
563             $canon = $query->can("canonical") ? $query->canonical() : $query->value();
564             }
565             else {
566             $hash = $pretty = $canon = $query};
567              
568             return ($hash, $pretty, $canon);
569             }
570              
571              
572             ###
573              
574             =head2 Auxiliary Methods
575              
576             Sequence numbers (Seqnos) are primary keys to the database table where
577             each row contains the meta fields of one BEACON file
578              
579              
580             =head3 Seqnos ( $colname , $query )
581              
582             Return Seqnos from querying the table with all beacon headers in
583             column (field name) $colname for a $query
584             (which may contain SQL placeholders '%').
585              
586             =cut
587              
588             sub Seqnos {
589             my ($self, $colname, $query) = @_;
590              
591             $colname ||= "";
592             $query ||= "";
593              
594             my $constraint = "";
595             if ( $query ) {
596             my $dbcolname = "";
597             if ( $colname =~ /^_(\w+)$/ ) {
598             $dbcolname = $1}
599             elsif ( $dbcolname = $self->beaconfields($colname) ) {}
600             else {
601             croak("column name '$colname' not known. Aborting")};
602              
603             $constraint = ($query =~ /%/) ? "WHERE $dbcolname LIKE ?"
604             : "WHERE $dbcolname=?";
605             };
606              
607             my $sth = $self->stmtHdl(<<"XxX");
608             SELECT seqno FROM repos $constraint ORDER BY seqno;
609             XxX
610             my $aryref = $self->{dbh}->selectcol_arrayref($sth, {Columns=>[1]}, ($query ? ($query) : ()))
611             or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
612             return $aryref ? (@$aryref) : ();
613             }
614              
615              
616             =head3 RepoCols ( [ $colname [, $seqno_or_alias ]] )
617              
618             Return a hashref indexed by seqence number of all values of column (header field) $colname [alias]
619             optionally constrained by a SeqNo or Alias.
620              
621             Default for $colname is '_alias'.
622              
623             =cut
624              
625              
626             sub RepoCols {
627             my ($self, $colname, $seqno_or_alias) = @_;
628             $colname ||= "_alias";
629             $seqno_or_alias ||= "";
630              
631             my $dbcolname = "";
632             if ( $colname =~ /^_(\w+)$/ ) {
633             $dbcolname = $1}
634             elsif ( $dbcolname = $self->beaconfields($colname) ) {}
635             else {
636             croak("column name '$colname' not known. Aborting")};
637              
638             my ($constraint, @cval) = mkConstraint($seqno_or_alias);
639             my $sth = $self->stmtHdl(<<"XxX");
640             SELECT seqno, $dbcolname FROM repos $constraint ORDER BY alias;
641             XxX
642             my $aryref = $self->{dbh}->selectcol_arrayref($sth, {Columns=>[1..2]}, @cval)
643             or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
644             if ( $aryref ) {
645             my %hash = @$aryref;
646             return \%hash;
647             };
648             return undef;
649             }
650              
651             sub mkConstraint {
652             local ($_) = @_;
653             return ("", ()) unless defined $_;
654             if ( /^%*$/ ) { return ("", ()) }
655             elsif ( /^\d+$/ ) { return (" WHERE seqno=?", $_) }
656             elsif ( /%/ ) { return (" WHERE alias LIKE ?", $_) }
657             elsif ( $_ ) { return (" WHERE alias=?", $_) }
658             else { return ("", ()) };
659             }
660              
661             =head3 OSDValues ( [ $key ] )
662              
663             Returns a hashref containing the OpenSearchDescription keywords and their
664             respective values.
665              
666             =cut
667              
668             sub OSDValues {
669             my ($self, $key) = @_;
670             $key ||= "";
671              
672             my $constraint = "";
673             if ( $key =~ /%/ ) {
674             $constraint = " WHERE (key LIKE ?)"}
675             elsif ( $key ) {
676             $constraint = " WHERE (key=?)"};
677              
678             my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
679             SELECT key, val FROM osd $constraint;
680             XxX
681             $self->stmtExplain($sthexpl, ($key ? ($key) : ())) if $ENV{'DBI_PROFILE'};
682             $sth->execute(($key ? ($key) : ())) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
683              
684             my %result = ();
685             while ( my $aryref = $sth->fetchrow_arrayref ) {
686             my ($key, $val) = @$aryref;
687             # last unless defined $key; # undef on first call if nothing to be delivered?
688             next if $key =~ /^bc/; # BeaconMeta Fields smuggled in
689             if ( exists $result{$key} ) {
690             if ( ref($result{$key}) ) {
691             push(@{$result{$key}}, $val)}
692             else {
693             $result{$key} = [$result{$key}, $val]};
694             }
695             elsif ( $key eq "DateModified" ) {
696             $result{$key} = tToISO($val)}
697             else {
698             $result{$key} = $val};
699             };
700             return undef unless %result;
701             return \%result;
702             }
703              
704             =head3 admhash ( )
705              
706             Returns a hashref with the contents of the admin table (readonly, not tied).
707              
708             =cut
709              
710             sub admhash {
711             my $self = shift;
712              
713             my ($admh, $admexpl) = $self->stmtHdl("SELECT key, val FROM admin;")
714             or croak("Could not prepare statement (dump admin table)".$self->{dbh}->errstr);
715             $self->stmtExplain($admexpl) if $ENV{'DBI_PROFILE'};
716             $admh->execute() or croak("Could not execute statement (dump admin table): ".$admh->errstr);
717             my %adm = ();
718             while ( my $onerow = $admh->fetchrow_arrayref() ) {
719             if ( $admh->err ) {
720             croak("Could not iterate through admin table: ".$admh->errstr)};
721             my ($key, $val) = @$onerow;
722             $adm{$key} = (defined $val) ? $val : "";
723             };
724             return \%adm;
725             }
726              
727              
728             =head3 autoIdentifier ()
729              
730             Initializes a missing C from the IDENTIFIER_CLASS entry in the admin table.
731              
732             =cut
733              
734             sub autoIdentifier {
735             my ($self) = @_;
736              
737             return $self->{identifierClass} if exists $self->{identifierClass} && ref($self->{identifierClass});
738              
739             my ($admich, $admichexpl) = $self->stmtHdl("SELECT key, val FROM admin WHERE key=?;")
740             or croak("Could not prepare statement (dump admin table)".$self->{dbh}->errstr);
741             $self->stmtExplain($admichexpl, 'IDENTIFIER_CLASS') if $ENV{'DBI_PROFILE'};
742             $admich->execute('IDENTIFIER_CLASS') or croak("Could not execute statement (IDENTIFIER_CLASS from admin table): ".$admich->errstr);
743             my %adm = ();
744             while ( my $onerow = $admich->fetchrow_arrayref() ) {
745             if ( $admich->err ) {
746             croak("Could not iterate through admin table: ".$admich->errstr)};
747             my ($key, $val) = @$onerow;
748             $adm{$key} = $val || "";
749             };
750              
751             if ( my $package = $adm{"IDENTIFIER_CLASS"} ) {
752             eval { $self->{identifierClass} = $package->new() };
753             return $self->{identifierClass} unless $@;
754              
755             eval {
756             (my $pkgpath = $package) =~ s=::=/=g; # require needs path...
757             require "$pkgpath.pm";
758             import $package;
759             };
760             if ( $@ ) {
761             croak "sorry: Identifier Class $package cannot be imported\n$@"};
762              
763             return $self->{identifierClass} = $package->new();
764             };
765             return undef;
766             }
767              
768              
769             =head3 findExample ( $goal, $offset, [ $sth ])
770              
771             Returns a hashref
772              
773             { id => identier,
774             response => Number of beacon files matching "/" Sum of individual hit counts
775             }
776              
777             for the C<$offset>'th identifier occuring in at least C<$goal> beacon instances.
778              
779             $sth will be initialized by a statement handle to pass to subsequent calls if
780             defined but false.
781              
782             =cut
783              
784             sub findExample {
785             my ($self, $goal, $offset, $sth) = @_;
786             my $sthexpl;
787             unless ( $sth ) {
788             ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
789             SELECT hash, COUNT(*), SUM(hits) FROM beacons GROUP BY hash HAVING COUNT(*)>=? LIMIT 1 OFFSET ?;
790             XxX
791             #
792             $_[3] = $sth if defined $_[3];
793             };
794             $offset ||= 0;
795             $sth->bind_param(1, $goal, SQL_INTEGER);
796             $sth->bind_param(2, $offset, SQL_INTEGER);
797             if ( $sthexpl && $ENV{'DBI_PROFILE'} ) {
798             $sthexpl->[0]->bind_param(1, $goal, SQL_INTEGER);
799             $sthexpl->[0]->bind_param(2, $offset, SQL_INTEGER);
800             $self->stmtExplain($sthexpl);
801             };
802             $sth->execute() or croak("Could not execute canned sql (findExample): ".$sth->errstr);
803             if ( my $onerow = $sth->fetchrow_arrayref ) {
804             if ( defined $self->{identifierClass} ) {
805             my $c = $self->{identifierClass};
806             # compat: hash might not take an argument, must resort to value, has to be cleared before...
807             $c->value("");
808             my $did = $c->hash($onerow->[0]) || $c->value($onerow->[0]);
809             my $expanded = $c->can("pretty") ? $c->pretty() : $c->value();
810             return {id=>$expanded, response=>"$onerow->[1]/$onerow->[2]"};
811             }
812             else {
813             return {id=>$onerow->[0], response=>"$onerow->[1]/$onerow->[2]"}};
814             };
815             return undef;
816             };
817              
818             # Date prettyprint
819              
820             sub tToISO {
821             local($_) = HTTP::Date::time2isoz($_[0] || 0);
822             tr[ ][T];
823             return $_;
824             }
825              
826             # URL-encode data
827             sub urlpseudoescape { # we don't do a thorough job here, because it is not clear whether
828             # /a/b/c is a parameter ("/" must be encoded) or part of a path ("/" must not be encoded)
829             # and we must avoid URL-escaping already escaped content
830             # Therefore we only escape spaces and characters > 127
831             local ($_) = @_;
832             # $_ = pack("C0a*", $_); # Zeichen in Bytes zwingen
833             utf8::encode($_); # Zeichen in Bytes zwingen
834             # FYI
835             # reserved uri characters: [;/?:@&=+$,] by RFC 3986
836             # ;=%3B /=%2F ?=%3F :=%3A @=%40 &=%26 ==%3D +=%2B $=%24 ,=%2C
837             # delims = [<>#%"], unwise = [{}|\\\^\[\]`]
838             # mark (nreserved) = [-_.!~*'()]
839             # 222222257
840             # 1789ACEFE
841             # s/([^a-zA-Z0-9!'()*\-._~])/sprintf("%%%02X",ord($1))/eg;
842             s/([^\x21-\x7e])/sprintf("%%%02X",ord($1))/eg;
843             return $_;
844             }
845              
846              
847             # SQL handle management
848             sub stmtHdl {
849             my ($self, $sql, $errtext) = @_;
850             $errtext ||= $sql;
851             my $if_active = $ENV{'DBI_PROFILE'} ? 0 : 1;
852             my $sth = $self->{dbh}->prepare_cached($sql, {}, $if_active)
853             or croak("Could not prepare $errtext: ".$self->{dbh}->errstr);
854             return $sth unless wantarray;
855             if ( $ENV{'DBI_PROFILE'} ) {
856             my @callerinfo = caller;
857             print STDERR "reusing handle for $sql (@callerinfo)===\n" if $sth->{Executed};
858             my $esth = $self->{dbh}->prepare_cached("EXPLAIN QUERY PLAN $sql", {}, 0)
859             or croak("Could not prepare explain query plan stmt: ".$self->{dbh}->errstr);
860             return $sth, [$esth, $sql];
861             }
862             else {
863             return $sth, undef};
864             };
865              
866             sub stmtExplain {
867             my ($self, $eref, @args) = @_;
868             my $esql = $eref->[1];
869             my @callerinfo = caller;
870             print STDERR "explain $esql\n\tfor data @args\n(@callerinfo)===\n";
871             my $esth = $eref->[0];
872             $esth->execute(@args) or croak("cannot execute explain statement $esql with args @args");
873             local $" = " | ";
874             while ( my $rowref = $esth->fetchrow_arrayref ) {
875             print STDERR "@$rowref\n";
876             }
877             print STDERR "===\n";
878             }
879              
880              
881             =head1 BUGS
882              
883              
884              
885             =head1 SUPPORT
886              
887             Send mail to the author
888              
889             =head1 AUTHOR
890              
891             Thomas Berger
892              
893             =head1 COPYRIGHT
894              
895             This program is free software; you can redistribute
896             it and/or modify it under the same terms as Perl itself.
897              
898             The full text of the license can be found in the
899             LICENSE file included with this module.
900              
901              
902             =head1 SEE ALSO
903              
904             perl(1).
905              
906             =cut
907              
908             #################### main pod documentation end ###################
909              
910             1;
911