File Coverage

blib/lib/SeeAlso/Source/BeaconAggregator.pm
Criterion Covered Total %
statement 21 23 91.3
branch n/a
condition n/a
subroutine 7 7 100.0
pod n/a
total 28 30 93.3


line stmt bran cond sub pod time code
1             package SeeAlso::Source::BeaconAggregator;
2 13     13   26042 use strict;
  13         13  
  13         337  
3 13     13   42 use warnings;
  13         9  
  13         248  
4              
5             BEGIN {
6 13     13   33 use Exporter ();
  13         58  
  13         170  
7 13     13   36 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  13         11  
  13         941  
8 13     13   22 $VERSION = '0.2_90';
9 13         103 @ISA = qw(Exporter);
10             #Give a hoot don't pollute, do not export more than needed by default
11 13         15 @EXPORT = qw();
12 13         15 @EXPORT_OK = qw();
13 13         231 %EXPORT_TAGS = ();
14             }
15              
16 13     13   49 use vars qw($DATA_VERSION);
  13         8  
  13         438  
17             $DATA_VERSION = 2;
18              
19 13     13   2616 use SeeAlso::Response;
  0            
  0            
20             use base ("SeeAlso::Source");
21              
22             use DBI qw(:sql_types);
23             use HTTP::Date;
24              
25             use CGI;
26             use Carp;
27              
28             #################### main pod documentation begin ###################
29             ## Below is the stub of documentation for your module.
30             ## You better edit it!
31              
32             =head1 NAME
33              
34             SeeAlso::Source::BeaconAggregator - Beacon files as source for SeeAlso::Server
35              
36             =head1 SYNOPSIS
37              
38             use CGI;
39             use SeeAlso::Identifier::ISSN;
40             use SeeAlso::Server;
41             use SeeAlso::Source::BeaconAggregator;
42              
43             my $srcdescription = {
44             "ShortName" => "TestService", # 16 Characters
45             "LongName" => "Sample SeeAlso Beacon Aggregator", # 48 characters
46             # "Description" => "The following services are contained: ...", # 1024 Characters
47             "DateModfied" => "...",
48             _dont_advertise => 1,
49             };
50              
51             my $CGI = CGI->new(); binmode(STDOUT, ":utf8");
52              
53             my $source = SeeAlso::Source::BeaconAggregator->new(
54             'file' => "/path/to/existing/database",
55             'identifierClass' => SeeAlso::Identifier::ISSN->new(),
56             'verbose' => 1,
57             'description' => $srcdescription,
58             );
59              
60             my $server = SeeAlso::Server->new (
61             'cgi' => $CGI,
62             xslt => "/client/showservice.xsl", # => +
63             clientbase => "/client/", # =>
64             expires => "+2d",
65             );
66              
67             my $rawid = $CGI->param('id') || "";
68             my $identifier = $rawid ? SeeAlso::Identifier::ISSN->new($rawid) : "";
69             my $result = $server->query($source, $identifier ? $identifier->value() : undef);
70             print $result;
71              
72              
73             =head1 DESCRIPTION
74              
75             This Module allows a collection of BEACON files (cf. http://de.wikipedia.org/wiki/Wikipedia:BEACON)
76             to be used as SeeAlso::Source (probably in the context of an SeeAlso::Server application).
77             Therefore it implements the four methods documented in SeeAlso::Source
78              
79             The BEACON files (lists of non-local identifiers of a certain type documenting the coverage of a given
80             online database plus means for access) are imported by the methods provided by
81             SeeAlso::Source::BeaconAggregator::Maintenance.pm, usually by employing the script sasbactrl.pl
82             as command line client.
83              
84             Serving other formats than SeeAlso or providing a BEACON file with respect to this
85             SeeAlso service is achieved by using SeeAlso::Source::BeaconAggregator::Publisher.
86              
87              
88             =head1 USAGE
89              
90              
91             =head2 Class methods
92              
93             =cut
94              
95             our %BeaconFields = ( # in den BEACON-Formaten definierte Felder
96             FORMAT => ['VARCHAR(16)', 1], # Pflicht
97             TARGET => ['VARCHAR(1024)', 1], # Pflicht, enthaelt {ID}
98             # PND-BEACON
99             VERSION => ['VARCHAR(16)'], # Only V0.1 supported
100             FEED => ['VARCHAR(255)'],
101             CONTACT => ['VARCHAR(63)'],
102             INSTITUTION => ['VARCHAR(1024)'],
103             ISIL => ['VARCHAR(64)'],
104             DESCRIPTION => ['VARCHAR(2048)'],
105             UPDATE => ['VARCHAR(63)'],
106             TIMESTAMP => ['INTEGER'],
107             REVISIT => ['INTEGER'],
108             # BEACON
109             EXAMPLES => ['VARCHAR(255)'],
110             MESSAGE => ['VARCHAR(255)'], # enthaelt {hits}
111             ONEMESSAGE => ['VARCHAR(255)'],
112             SOMEMESSAGE => ['VARCHAR(255)'],
113             PREFIX => ['VARCHAR(255)'],
114             # NEWER
115             COUNT => ['VARCHAR(255)'],
116             REMARK => ['VARCHAR(2048)'],
117             # WInofficial
118             NAME => ['VARCHAR(255)'],
119             # Experimental
120             ALTTARGET => ['VARCHAR(1024)'],
121             IMGTARGET => ['VARCHAR(1024)'],
122             );
123              
124              
125              
126             =head3 beaconfields ( [ $what ] )
127              
128             (Class method) Called without parameter returns an array of all valid field names
129             for meta headers
130              
131             @meta_supported = SeeAlso::Source::BeaconAggregator->beaconfields();
132              
133             With given parameter $what in scalar context returns the column
134             name of the database for the abstract field name. In array context
135             additionally the column type and optional flag designating a
136             mandatory entry are returned.
137              
138             $internal_col = SeeAlso::Source::BeaconAggregator->beaconfields('FORMAT');
139              
140             ($internal_col, $specs, $mandatory)
141             = SeeAlso::Source::BeaconAggregator->beaconfields('FORMAT');
142              
143             Fields are:
144              
145             # mandatory
146             FORMAT, TARGET
147             # as of BEACON spec
148             VERSION, FEED, TIMESTAMP, REVISIT, UPDATE
149             CONTACT, INSTITUTION, ISIL,
150             # from the experimental BEACON spec
151             MESSAGE, ONEMESSAGE, SOMEMESSAGE
152             PREFIX, EXAMPLES
153             # later additions
154             COUNT, REMARK
155             # current practise
156             NAME
157             # experimental extension "Konkordanzformat"
158             ALTTARGET, IMGTARGET
159              
160              
161             =cut
162              
163             sub beaconfields {
164             my ($class, $what) = @_;
165             return keys %BeaconFields unless $what;
166             return undef unless $BeaconFields{$what};
167             return wantarray ? ("bc$what", @{$BeaconFields{$what}}) : "bc$what";
168             }
169              
170              
171             our %OSDElements = ( # fuer OpensearchDescription deklarierte Felder
172             "ShortName" => "*", # <= 16 Zeichen, PFLICHT!
173             "Description" => "*", # <= 1024 Zeichen, PFLICHT!
174              
175             "Contact" => "*", # "nackte" Mailadresse user@domain, optional.
176             "Tags" => "*", # Liste von Einzelworten, <= 256 Zeichen, optional.
177             "LongName" => "*", # <= 48 Zeichen, optional.
178             "Developer" => "*", # <= 64 Zeichen, optional.
179             "Attribution" => "*", # <= 256 Zeichen, optional.
180             "SyndicationRight" => "open", # open, limited, private, closed
181             "AdultContent" => "false", # false/no/0: false, sonst: true
182              
183             "Language" => "*",
184             "InputEncoding" => "UTF-8",
185             "OutputEncoding" => "UTF-8",
186             # "dcterms:modified" => "",
187             # repeatable fields w/o contents, treated specially
188             # "Url" => {type => "*", template => "*"},
189             # "Query" => {role => "example", searchTerms => "*"},
190             # Special for the SeeAlso::Family
191             "Example" => "*",
192             "Examples" => "*",
193             "BaseURL" => "*", # Auto
194             "DateModified" => "*", # alias for dcterms:modified
195             "Source" => "*",
196             );
197              
198              
199             =head3 osdKeys ( [ $what ] )
200              
201             (Class method) Called without parameter returns an array of all valid element names
202             for the OpenSearchDescription:
203              
204             @meta_names = SeeAlso::Source::BeaconAggregator->osdKeys();
205              
206             With given parameter $what returns the value for the given OpenSearchDescription
207             element:
208              
209             $osd_value = SeeAlso::Source::BeaconAggregator->beaconfields('LongName');
210              
211             OSD elements are
212              
213             ShortName, Description
214             Contact, Tags, LongName, Developer, Attribution, SyndicationRight, AdultContent
215             Language, InputEncoding, OutputEncoding
216             # special for SeeAlso::Family
217             Example, Examples, BaseURL, DateModified, Source
218              
219             =cut
220              
221             sub osdKeys {
222             my ($class, $what) = @_;
223             return keys %OSDElements unless $what;
224             return undef unless $OSDElements{$what};
225             return $OSDElements{$what};
226             }
227              
228              
229             =head2 SeeAlso::Source methods
230              
231             =head3 new( %accessor [, %options ] )
232              
233             Creates the SeeAlso::Source::BeaconAggregator object and connects to an existing
234             database previously created with the methods from
235             SeeAlso::Source::BeaconAggregator::Maintenance (currently SQLlite)
236              
237             Accessor options:
238              
239             =over 8
240              
241             =item dbh
242              
243             handle of a database already connected to
244              
245             =item dbroot
246              
247             optional path to prepend to dsn or file
248              
249             =item dsn
250              
251             directory name (directory contains the database file "-db"
252              
253             =item file
254              
255             full path of the database
256              
257             =back
258              
259             Other options:
260              
261             =over 8
262              
263             =item identifierClass
264              
265             contains an already instantiated object of that class
266              
267             =item verbose (0|1)
268              
269             =item description
270              
271             Hashref with options to be piped through to SeeAlso::Source
272              
273             =item aliasfilter
274              
275             Hashref with aliases to be filtered out from query results
276              
277             =item cluster
278              
279             dsn of a beacon source of identical identifier type giving a mapping (hash / altid)
280             e.g. invalidated identifiers -> current identifiers.
281              
282             When the identifier supplied for query() is mentioned in this table, the query will be
283             executed against the associated current identifier and all invalidated ones
284             (backward translation of forward translation).
285              
286             When not (the mapping might not necessarily include the identiy mapping),
287             the query behaves as if no "cluster" was given.
288              
289             For translation between different identifier schemes before querying,
290             use an appropriate SeeAlso::Identifier class.
291              
292              
293             =back
294              
295             Returns undef if unable to DBI->connect() to the database.
296              
297             =cut
298              
299             sub new {
300             my ($class, %options) = @_;
301             my $self = {%options};
302             bless($self, $class);
303              
304             if ( $self->{dsn} ) {
305             croak("no special characters allowed for dsn") unless $self->{dsn} =~ /^[\w!,.{}-]+$/};
306              
307             if ( $self->{dbroot} ) {
308             return undef unless -d $self->{dbroot};
309             $self->{dbroot} .= "/" unless $self->{dbroot} =~ m!/$!;
310             };
311              
312             my $dbfile;
313             if ( $self->{dbh} ) { # called with handle...
314             return $self;
315             }
316             elsif ( $self->{dsn} ) {
317             $dbfile = $self->{dsn}."/".$self->{dsn}."-db";
318             (substr($dbfile, 0, 0) = $self->{dbroot}) if $self->{dbroot};
319             }
320             elsif ( $dbfile = $self->{file} ) {
321             if ( $self->{dbroot} ) {
322             substr($dbfile, 0, 0) = $self->{dbroot}};
323             };
324              
325             return undef unless $dbfile;
326              
327             my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", "", "",
328             {
329             # RaiseError => 1,
330             sqlite_unicode => 1,
331             });
332             return undef unless $dbh;
333             $self->{dbh} = $dbh;
334              
335             if ( $self->{cluster} ) {
336             my $clusterfile = $self->{cluster}."/".$self->{cluster}."-db";
337             (substr($clusterfile, 0, 0) = $self->{dbroot}) if $self->{dbroot};
338             $dbh->do("ATTACH DATABASE '$clusterfile' AS cluster") or croak("error attaching cluster database '$clusterfile'");
339             };
340              
341             return $self;
342             }
343              
344              
345             =head3 description ()
346              
347             Inherited from SeeAlso::Source.
348              
349             =cut
350              
351             sub description {
352             my $self = shift;
353             $self->enrichdescription() unless $self->{descriptioncached};
354             return $self->SUPER::description(@_);
355             }
356              
357             =head3 about ()
358              
359             Inherited from SeeAlso::Source.
360              
361             =cut
362              
363             sub about {
364             my $self = shift;
365             $self->enrichdescription() unless $self->{descriptioncached};
366             return $self->SUPER::about(@_);
367             }
368              
369              
370             sub enrichdescription {
371             my ($self) = @_;
372             my $rawref = $self->OSDValues();
373             my %result;
374             foreach ( keys %$rawref ) {
375             next unless $rawref->{$_};
376             if ( ref($rawref->{$_}) ) { # List
377             if ( $_ =~ /^Example/ ) {
378             my @ary;
379             foreach my $item ( @{$rawref->{$_}} ) {
380             next unless $item;
381             my($i, $r) = split(/\s*\|\s*/, $item, 2);
382             next unless $i;
383             if ( $r ) {
384             push(@ary, {'id'=>$i, 'response'=>$r})}
385             else {
386             push(@ary, {'id'=>$i})}
387             }
388             $result{$_} = \@ary if @ary;
389             }
390             else {
391             $result{$_} = join(";\n", @{$rawref->{$_}})};
392             }
393             else { # Scalar
394             if ( $_ =~ /^Example/ ) {
395             my($i, $r) = split(/\s*\|\s*/, $rawref->{$_}, 2);
396             next unless $i;
397             if ( $r ) {
398             $result{$_} = [{'id'=>$i, 'response'=>$r}]}
399             else {
400             $result{$_} = [{'id'=>$i}]}
401             }
402             else {
403             $result{$_} = $rawref->{$_}};
404             }
405             };
406              
407              
408             if ( $self->{description} ) {
409             my %combined = (%result, %{$self->{description}});
410             $self->{description} = \%combined;
411             }
412             elsif ( %result ) {
413             $self->{description} = \%result};
414              
415             $self->{descriptioncached} = 1;
416             }
417              
418             ### Antworten fuer Anfragen als Format seealso
419              
420             =head3 set_aliasfilter ( @aliaslist )
421              
422             Init the hash with
423              
424             =cut
425              
426             sub set_aliasfilter {
427             my ($self, @aliaslist) = @_;
428             $self->{'aliasfilter'} = { map { ($_, "") } @aliaslist };
429             return $self->{'aliasfilter'};
430             }
431              
432             =head3 query( [ $identifier] )
433              
434             Returns a SeeAlso::Response listing all matches to the given string or
435             SeeAlso::Identifier $identifier.
436              
437             =cut
438              
439             sub query { # SeeAlso-Simple response
440             my ($self, $query) = @_;
441             my ($hash, $pretty, $canon) = $self->prepare_query($query);
442             my $response = SeeAlso::Response->new($canon);
443              
444             my $clusterid;
445             if ( $self->{cluster} ) {
446             my ($clusterh, $clusterexpl) = $self->stmtHdl("SELECT beacons.altid FROM cluster.beacons WHERE beacons.hash=? OR beacons.altid=? LIMIT 1;");
447             $self->stmtExplain($clusterexpl, $hash, $hash) if $ENV{'DBI_PROFILE'};
448             $clusterh->execute($hash, $hash);
449             while ( my $onerow = $clusterh->fetchrow_arrayref() ) {
450             $clusterid = $onerow->[0];}
451             }
452              
453             my ( $tfield, $afield, $mfield, $m1field, $msfield, $dfield, $nfield, $ifield)
454             = map{ scalar $self->beaconfields($_) }
455             # 6 7 8 9 10 11 12 13
456             qw(TARGET ALTTARGET MESSAGE ONEMESSAGE SOMEMESSAGE DESCRIPTION NAME INSTITUTION);
457             # 0 1 2 3 4 5
458             # 14 15
459             my ($sth, $sthexpl);
460             if ( $clusterid ) { # query IN cluster (leader id might not exist at LHS, therefore unionize with beacons.hash=$clusterid (!)
461             ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
462             SELECT beacons.hash, beacons.altid, beacons.seqno, beacons.hits, beacons.info, beacons.link,
463             repos.$tfield, repos.$afield, repos.$mfield, repos.$m1field, repos.$msfield, repos.$dfield, repos.$nfield, repos.$ifield,
464             repos.sort, repos.alias
465             FROM beacons NATURAL LEFT JOIN repos
466             WHERE ( (beacons.hash=?)
467             OR (beacons.hash IN (SELECT cluster.beacons.hash FROM cluster.beacons WHERE cluster.beacons.altid=?)) )
468             ORDER BY repos.sort, repos.alias;
469             XxX
470             $self->stmtExplain($sthexpl, $clusterid, $clusterid) if $ENV{'DBI_PROFILE'};
471             $sth->execute($clusterid, $clusterid) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
472             }
473             else { # simple query
474             ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
475             SELECT beacons.hash, beacons.altid, beacons.seqno, beacons.hits, beacons.info, beacons.link,
476             repos.$tfield, repos.$afield, repos.$mfield, repos.$m1field, repos.$msfield, repos.$dfield, repos.$nfield, repos.$ifield,
477             repos.sort, repos.alias
478             FROM beacons NATURAL LEFT JOIN repos
479             WHERE beacons.hash=?
480             ORDER BY repos.sort, repos.alias;
481             XxX
482             $self->stmtExplain($sthexpl, $hash) if $ENV{'DBI_PROFILE'};
483             $sth->execute($hash) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
484             }
485              
486             my $c = $self->{identifierClass} || undef;
487             my %didalready;
488             while ( my $onerow = $sth->fetchrow_arrayref() ) {
489             # last unless defined $onerow->[0]; # strange end condition
490             next if $onerow->[15] && exists $self->{'aliasfilter'}->{$onerow->[15]};
491              
492             my $hits = $onerow->[3];
493              
494             my $h = $onerow->[0];
495             my $p;
496             if ( $h eq $hash ) {
497             $p = $pretty}
498             elsif ( $clusterid && ref($c) ) {
499             $c->value("");
500             my $did = $c->hash($h) || $c->value($h) || $h;
501             $p = $c->can("pretty") ? $c->pretty() : $c->value();
502             };
503             $p = ($clusterid ? $h : $pretty) unless defined $p;
504              
505             my $uri;
506             if ( $uri = $onerow->[5] ) { # Expliziter Link
507             }
508             elsif ( $onerow->[1] && $onerow->[7] ) { # Konkordanzformat
509             $uri = sprintf($onerow->[7], $p, urlpseudoescape($onerow->[1]))}
510             elsif ( $onerow->[6] ) { # normales Beacon-Format
511             $uri = sprintf($onerow->[6], $p)}
512             elsif ( $onerow->[7] ) { # Neues Format
513             $uri = sprintf($onerow->[7], $p, urlpseudoescape($p))};
514             next unless $uri;
515              
516             # MESSAGE || NAME || INSTITUTION || DESCRIPTION
517             my $label = $onerow->[8] || $onerow->[12] || $onerow->[13] || $onerow->[11] || "???";
518             if ( $hits == 1 ) {
519             $label = $onerow->[9] if $onerow->[9]}
520             elsif ( $hits == 0 ) {
521             $label = $onerow->[10] if $onerow->[10]}
522             elsif ( $hits ) {
523             ($label .= " (%s)") unless ($label =~ /(^|[^%])%s/)};
524              
525             $label = sprintf($label, $hits);
526             $onerow->[4] = "" unless defined $onerow->[4];
527              
528             # my $description = $hits; # entsprechend opensearchsuggestions: pleonastisch, langweilig
529             # my $description = $onerow->[12] || $onerow->[13] || $onerow->[8] || $onerow->[10] || $onerow->[5]; # NAME or INSTITUTION or SOMEMESSAGE or MESSAGE
530             # DESCRIPTION || INSTITUTION || NAME || SOMEMESSAGE || MESSAGE || alias
531             my $description = $onerow->[11] || $onerow->[13] || $onerow->[12] || $onerow->[10] || $onerow->[8] || $onerow->[15] || ""; # INSTITUTION or NAME or SOMEMESSAGE or MESSAGE
532              
533             # Anreicherungen
534             if ( ($onerow->[4] =~ /\d{2}/) and ($onerow->[4] !~ /[a-wyz]/) ) {
535             $description .= " [".$onerow->[4]."]"} # add info
536             else {
537             # $onerow->[1] = "" unless defined $onerow->[1];
538             $label .= " [".$onerow->[4]."]" if $onerow->[4]; # add info
539             $description .= " [".$onerow->[1]."]" if $onerow->[1]; # Add target identifier
540             };
541              
542             $response->add($label, $description, $uri) unless $didalready{join("\x7f", $label, $description, $uri)}++;
543             }
544              
545             return $response;
546             }
547              
548             sub prepare_query {
549             my ($self, $query) = @_;
550             my ($hash, $pretty, $canon);
551             # search by: $hash
552             # forward by: $pretty
553             # normalize by: $canon
554             my $c = $self->{identifierClass};
555             if ( defined $c ) { # cast!
556             my $qval = ref($query) ? $query->as_string : $query;
557             $c->value($qval);
558             $hash = $c->hash();
559             $pretty = $c->can("pretty") ? $c->pretty() : $c->value();
560             $canon = $c->can("canonical") ? $c->canonical() : $c->value();
561             }
562             elsif ( ref($query) ) {
563             $hash = $query->hash();
564             $pretty = $query->can("pretty") ? $query->pretty() : $query->value();
565             $canon = $query->can("canonical") ? $query->canonical() : $query->value();
566             }
567             else {
568             $hash = $pretty = $canon = $query};
569              
570             return ($hash, $pretty, $canon);
571             }
572              
573              
574             ###
575              
576             =head2 Auxiliary Methods
577              
578             Sequence numbers (Seqnos) are primary keys to the database table where
579             each row contains the meta fields of one BEACON file
580              
581              
582             =head3 Seqnos ( $colname , $query )
583              
584             Return Seqnos from querying the table with all beacon headers in
585             column (field name) $colname for a $query
586             (which may contain SQL placeholders '%').
587              
588             =cut
589              
590             sub Seqnos {
591             my ($self, $colname, $query) = @_;
592              
593             $colname ||= "";
594             $query ||= "";
595              
596             my $constraint = "";
597             if ( $query ) {
598             my $dbcolname = "";
599             if ( $colname =~ /^_(\w+)$/ ) {
600             $dbcolname = $1}
601             elsif ( $dbcolname = $self->beaconfields($colname) ) {}
602             else {
603             croak("column name '$colname' not known. Aborting")};
604              
605             $constraint = ($query =~ /%/) ? "WHERE $dbcolname LIKE ?"
606             : "WHERE $dbcolname=?";
607             };
608              
609             my $sth = $self->stmtHdl(<<"XxX");
610             SELECT seqno FROM repos $constraint ORDER BY seqno;
611             XxX
612             my $aryref = $self->{dbh}->selectcol_arrayref($sth, {Columns=>[1]}, ($query ? ($query) : ()))
613             or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
614             return $aryref ? (@$aryref) : ();
615             }
616              
617              
618             =head3 RepoCols ( [ $colname [, $seqno_or_alias ]] )
619              
620             Return a hashref indexed by seqence number of all values of column (header field) $colname [alias]
621             optionally constrained by a SeqNo or Alias.
622              
623             Default for $colname is '_alias'.
624              
625             =cut
626              
627              
628             sub RepoCols {
629             my ($self, $colname, $seqno_or_alias) = @_;
630             $colname ||= "_alias";
631             $seqno_or_alias ||= "";
632              
633             my $dbcolname = "";
634             if ( $colname =~ /^_(\w+)$/ ) {
635             $dbcolname = $1}
636             elsif ( $dbcolname = $self->beaconfields($colname) ) {}
637             else {
638             croak("column name '$colname' not known. Aborting")};
639              
640             my ($constraint, @cval) = mkConstraint($seqno_or_alias);
641             my $sth = $self->stmtHdl(<<"XxX");
642             SELECT seqno, $dbcolname FROM repos $constraint ORDER BY alias;
643             XxX
644             my $aryref = $self->{dbh}->selectcol_arrayref($sth, {Columns=>[1..2]}, @cval)
645             or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
646             if ( $aryref ) {
647             my %hash = @$aryref;
648             return \%hash;
649             };
650             return undef;
651             }
652              
653             sub mkConstraint {
654             local ($_) = @_;
655             return ("", ()) unless defined $_;
656             if ( /^%*$/ ) { return ("", ()) }
657             elsif ( /^\d+$/ ) { return (" WHERE seqno=?", $_) }
658             elsif ( /%/ ) { return (" WHERE alias LIKE ?", $_) }
659             elsif ( $_ ) { return (" WHERE alias=?", $_) }
660             else { return ("", ()) };
661             }
662              
663             =head3 OSDValues ( [ $key ] )
664              
665             Returns a hashref containing the OpenSearchDescription keywords and their
666             respective values.
667              
668             =cut
669              
670             sub OSDValues {
671             my ($self, $key) = @_;
672             $key ||= "";
673              
674             my $constraint = "";
675             if ( $key =~ /%/ ) {
676             $constraint = " WHERE (key LIKE ?)"}
677             elsif ( $key ) {
678             $constraint = " WHERE (key=?)"};
679              
680             my ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
681             SELECT key, val FROM osd $constraint;
682             XxX
683             $self->stmtExplain($sthexpl, ($key ? ($key) : ())) if $ENV{'DBI_PROFILE'};
684             $sth->execute(($key ? ($key) : ())) or croak("Could not execute >".$sth->{Statement}."<: ".$sth->errstr);
685              
686             my %result = ();
687             while ( my $aryref = $sth->fetchrow_arrayref ) {
688             my ($key, $val) = @$aryref;
689             # last unless defined $key; # undef on first call if nothing to be delivered?
690             next if $key =~ /^bc/; # BeaconMeta Fields smuggled in
691             if ( exists $result{$key} ) {
692             if ( ref($result{$key}) ) {
693             push(@{$result{$key}}, $val)}
694             else {
695             $result{$key} = [$result{$key}, $val]};
696             }
697             elsif ( $key eq "DateModified" ) {
698             $result{$key} = tToISO($val)}
699             else {
700             $result{$key} = $val};
701             };
702             return undef unless %result;
703             return \%result;
704             }
705              
706             =head3 admhash ( )
707              
708             Returns a hashref with the contents of the admin table (readonly, not tied).
709              
710             =cut
711              
712             sub admhash {
713             my $self = shift;
714              
715             my ($admh, $admexpl) = $self->stmtHdl("SELECT key, val FROM admin;")
716             or croak("Could not prepare statement (dump admin table)".$self->{dbh}->errstr);
717             $self->stmtExplain($admexpl) if $ENV{'DBI_PROFILE'};
718             $admh->execute() or croak("Could not execute statement (dump admin table): ".$admh->errstr);
719             my %adm = ();
720             while ( my $onerow = $admh->fetchrow_arrayref() ) {
721             if ( $admh->err ) {
722             croak("Could not iterate through admin table: ".$admh->errstr)};
723             my ($key, $val) = @$onerow;
724             $adm{$key} = (defined $val) ? $val : "";
725             };
726             return \%adm;
727             }
728              
729              
730             =head3 autoIdentifier ()
731              
732             Initializes a missing C from the IDENTIFIER_CLASS entry in the admin table.
733              
734             =cut
735              
736             sub autoIdentifier {
737             my ($self) = @_;
738              
739             return $self->{identifierClass} if exists $self->{identifierClass} && ref($self->{identifierClass});
740              
741             my ($admich, $admichexpl) = $self->stmtHdl("SELECT key, val FROM admin WHERE key=?;")
742             or croak("Could not prepare statement (dump admin table)".$self->{dbh}->errstr);
743             $self->stmtExplain($admichexpl, 'IDENTIFIER_CLASS') if $ENV{'DBI_PROFILE'};
744             $admich->execute('IDENTIFIER_CLASS') or croak("Could not execute statement (IDENTIFIER_CLASS from admin table): ".$admich->errstr);
745             my %adm = ();
746             while ( my $onerow = $admich->fetchrow_arrayref() ) {
747             if ( $admich->err ) {
748             croak("Could not iterate through admin table: ".$admich->errstr)};
749             my ($key, $val) = @$onerow;
750             $adm{$key} = $val || "";
751             };
752              
753             if ( my $package = $adm{"IDENTIFIER_CLASS"} ) {
754             eval { $self->{identifierClass} = $package->new() };
755             return $self->{identifierClass} unless $@;
756              
757             eval {
758             (my $pkgpath = $package) =~ s=::=/=g; # require needs path...
759             require "$pkgpath.pm";
760             import $package;
761             };
762             if ( $@ ) {
763             croak "sorry: Identifier Class $package cannot be imported\n$@"};
764              
765             return $self->{identifierClass} = $package->new();
766             };
767             return undef;
768             }
769              
770              
771             =head3 findExample ( $goal, $offset, [ $sth ])
772              
773             Returns a hashref
774              
775             { id => identier,
776             response => Number of beacon files matching "/" Sum of individual hit counts
777             }
778              
779             for the C<$offset>'th identifier occuring in at least C<$goal> beacon instances.
780              
781             $sth will be initialized by a statement handle to pass to subsequent calls if
782             defined but false.
783              
784             =cut
785              
786             sub findExample {
787             my ($self, $goal, $offset, $sth) = @_;
788             my $sthexpl;
789             unless ( $sth ) {
790             ($sth, $sthexpl) = $self->stmtHdl(<<"XxX");
791             SELECT hash, COUNT(*), SUM(hits) FROM beacons GROUP BY hash HAVING COUNT(*)>=? LIMIT 1 OFFSET ?;
792             XxX
793             #
794             $_[3] = $sth if defined $_[3];
795             };
796             $offset ||= 0;
797             $sth->bind_param(1, $goal, SQL_INTEGER);
798             $sth->bind_param(2, $offset, SQL_INTEGER);
799             if ( $sthexpl && $ENV{'DBI_PROFILE'} ) {
800             $sthexpl->[0]->bind_param(1, $goal, SQL_INTEGER);
801             $sthexpl->[0]->bind_param(2, $offset, SQL_INTEGER);
802             $self->stmtExplain($sthexpl);
803             };
804             $sth->execute() or croak("Could not execute canned sql (findExample): ".$sth->errstr);
805             if ( my $onerow = $sth->fetchrow_arrayref ) {
806             if ( defined $self->{identifierClass} ) {
807             my $c = $self->{identifierClass};
808             # compat: hash might not take an argument, must resort to value, has to be cleared before...
809             $c->value("");
810             my $did = $c->hash($onerow->[0]) || $c->value($onerow->[0]);
811             my $expanded = $c->can("pretty") ? $c->pretty() : $c->value();
812             return {id=>$expanded, response=>"$onerow->[1]/$onerow->[2]"};
813             }
814             else {
815             return {id=>$onerow->[0], response=>"$onerow->[1]/$onerow->[2]"}};
816             };
817             return undef;
818             };
819              
820             # Date prettyprint
821              
822             sub tToISO {
823             local($_) = HTTP::Date::time2isoz($_[0] || 0);
824             tr[ ][T];
825             return $_;
826             }
827              
828             # URL-encode data
829             sub urlpseudoescape { # we don't do a thorough job here, because it is not clear whether
830             # /a/b/c is a parameter ("/" must be encoded) or part of a path ("/" must not be encoded)
831             # and we must avoid URL-escaping already escaped content
832             # Therefore we only escape spaces and characters > 127
833             local ($_) = @_;
834             # $_ = pack("C0a*", $_); # Zeichen in Bytes zwingen
835             utf8::encode($_); # Zeichen in Bytes zwingen
836             # FYI
837             # reserved uri characters: [;/?:@&=+$,] by RFC 3986
838             # ;=%3B /=%2F ?=%3F :=%3A @=%40 &=%26 ==%3D +=%2B $=%24 ,=%2C
839             # delims = [<>#%"], unwise = [{}|\\\^\[\]`]
840             # mark (nreserved) = [-_.!~*'()]
841             # 222222257
842             # 1789ACEFE
843             # s/([^a-zA-Z0-9!'()*\-._~])/sprintf("%%%02X",ord($1))/eg;
844             s/([^\x21-\x7e])/sprintf("%%%02X",ord($1))/eg;
845             return $_;
846             }
847              
848              
849             # SQL handle management
850             sub stmtHdl {
851             my ($self, $sql, $errtext) = @_;
852             $errtext ||= $sql;
853             my $if_active = $ENV{'DBI_PROFILE'} ? 0 : 1;
854             my $sth = $self->{dbh}->prepare_cached($sql, {}, $if_active)
855             or croak("Could not prepare $errtext: ".$self->{dbh}->errstr);
856             return $sth unless wantarray;
857             if ( $ENV{'DBI_PROFILE'} ) {
858             my @callerinfo = caller;
859             print STDERR "reusing handle for $sql (@callerinfo)===\n" if $sth->{Executed};
860             my $esth = $self->{dbh}->prepare_cached("EXPLAIN QUERY PLAN $sql", {}, 0)
861             or croak("Could not prepare explain query plan stmt: ".$self->{dbh}->errstr);
862             return $sth, [$esth, $sql];
863             }
864             else {
865             return $sth, undef};
866             };
867              
868             sub stmtExplain {
869             my ($self, $eref, @args) = @_;
870             my $esql = $eref->[1];
871             my @callerinfo = caller;
872             print STDERR "explain $esql\n\tfor data @args\n(@callerinfo)===\n";
873             my $esth = $eref->[0];
874             $esth->execute(@args) or croak("cannot execute explain statement $esql with args @args");
875             local $" = " | ";
876             while ( my $rowref = $esth->fetchrow_arrayref ) {
877             print STDERR "@$rowref\n";
878             }
879             print STDERR "===\n";
880             }
881              
882              
883             =head1 BUGS
884              
885              
886              
887             =head1 SUPPORT
888              
889             Send mail to the author
890              
891             =head1 AUTHOR
892              
893             Thomas Berger
894              
895             =head1 COPYRIGHT
896              
897             This program is free software; you can redistribute
898             it and/or modify it under the same terms as Perl itself.
899              
900             The full text of the license can be found in the
901             LICENSE file included with this module.
902              
903              
904             =head1 SEE ALSO
905              
906             perl(1).
907              
908             =cut
909              
910             #################### main pod documentation end ###################
911              
912             1;
913