File Coverage

blib/lib/Noid.pm
Criterion Covered Total %
statement 22 24 91.6
branch n/a
condition n/a
subroutine 8 8 100.0
pod n/a
total 30 32 93.7


line stmt bran cond sub pod time code
1             package Noid;
2              
3 2     2   2174 use 5.000000;
  2         6  
  2         91  
4 2     2   10 use strict;
  2         4  
  2         69  
5 2     2   11 use warnings;
  2         6  
  2         381  
6              
7             require Exporter;
8             our @ISA = qw(Exporter);
9              
10             our $VERSION;
11             $VERSION = sprintf "%d.%02d", q$Name: Release-0-424 $ =~ /Release-(\d+)-(\d+)/;
12             our @EXPORT_OK = qw(
13             addmsg bind checkchar dbopen dbclose dbcreate dbinfo
14             errmsg fetch getnoid hold hold_release hold_set
15             locktest logmsg mint n2xdig note parse_template queue
16             sample scope validate VERSION xdig
17             );
18              
19             # Noid - Nice opaque identifiers (Perl module)
20             #
21             # Author: John A. Kunze, jak@ucop.edu, California Digital Library
22             # Originally created, UCSF/CKM, November 2002
23             #
24             # ---------
25             # Copyright (c) 2002-2006 UC Regents
26             #
27             # Permission to use, copy, modify, distribute, and sell this software and
28             # its documentation for any purpose is hereby granted without fee, provided
29             # that (i) the above copyright notices and this permission notice appear in
30             # all copies of the software and related documentation, and (ii) the names
31             # of the UC Regents and the University of California are not used in any
32             # advertising or publicity relating to the software without the specific,
33             # prior written permission of the University of California.
34             #
35             # THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
36             # EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
37             # WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
38             #
39             # IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE FOR ANY
40             # SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
41             # OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
42             # WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY
43             # THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE
44             # OR PERFORMANCE OF THIS SOFTWARE.
45             # ---------
46              
47             # Perl style note -- this code makes frequent use of a fast, big boolean
48             # version of an if-elsif-else idiom that Perl encourages because entering
49             # a { block } is relatively expensive, but it looks strange if you're not
50             # used to it. Instead of
51             #
52             # if ( e1 && e2 && e3 ) {
53             # s1;
54             # s2;
55             # ...;
56             # }
57             # elsif ( e4 || e5 && e6 ) {
58             # s3;
59             # }
60             # else {
61             # s4;
62             # s5;
63             # }
64             #
65             # we can write this series of test expressions and statements as
66             #
67             # e1 && e2 && e3 and
68             # s1,
69             # s2,
70             # 1 or
71             # e4 || e5 && e6 and
72             # s3,
73             # 1 or
74             # s4,
75             # s5,
76             # 1;
77             #
78             # If we KNOW (not safest) that s2 and s3 are "true", we shorten it to
79             #
80             # e1 && e2 && e3 and
81             # s1,
82             # s2
83             # or
84             # e4 || e5 && e6 and
85             # s3
86             # or
87             # s4,
88             # s5
89             # ;
90             #
91             # For the big boolean form to work, you'll be well-advised to call make
92             # your Perl calls with the parenthesized syntax, so that the commas
93             # terminating the boolean statements don't get swallowed up by the Perl
94             # functions and built-ins that you're using (eg, can get into trouble
95             # unless you parenthesize your "print" statements).
96              
97             # yyy many comment blocks are very out of date -- need thorough review
98             # yyy make it so that http://uclibs.org/PID/foo maps to
99             # ark.cdlib.org/ark:/13030/xzfoo [ requirement from SCP meeting May 2004]
100             # yyy use "wantarray" function to return either number or message
101             # when bailing out.
102             # yyy add cdlpid doc to pod ?
103             # yyy write about comparison with PURLs
104             # yyy check chars, authentication, ordinal stored in metadata
105             # yyy implement mod 4/8/16 distribution within large counter regions?
106             # yyy implement count-down counters as well as count-up?
107             # yyy make a shadow DB
108              
109             # yyy upgrade ark-service and ERC.pm (which still use PDB.pm)
110              
111             # yyy bindallow(), binddeny() ????
112              
113 2     2   11 use constant NOLIMIT => -1;
  2         4  
  2         190  
114 2     2   12 use constant SEQNUM_MIN => 1;
  2         3  
  2         91  
115 2     2   9 use constant SEQNUM_MAX => 1000000;
  2         4  
  2         129  
116              
117             # The database must hold nearly arbitrary user-level identifiers
118             # alongside various admin variables. In order not to conflict, we
119             # require all admin variables to start with ":/", eg, ":/oacounter".
120             # We use "$R/" frequently as our "reserved root" prefix.
121             #
122             my $R = ":"; # prefix for global top level of admin db variables
123              
124 2     2   8 use Fcntl qw(:DEFAULT :flock);
  2         4  
  2         1143  
125 2     2   3331 use BerkeleyDB;
  0            
  0            
126              
127             # Global %opendbtab is a hash that maps a hashref (as key) to a database
128             # reference. At a minimum, we need opendbtab so that we avoid passing a
129             # db reference to dbclose, which cannot do the final "untie" (see
130             # "untie gotcha" documentation) while the caller's db reference is
131             # still defined.
132             #
133             my %opendbtab;
134              
135             # To iterate over all Noids in the database, use
136             #
137             # each %hash
138             # return $db or null
139             # $flags one of O_RDONLY, O_RDWR, O_CREAT
140              
141             our ($legalstring, $alphacount, $digitcount);
142             our $locktest = 0;
143              
144             # Adds an error message for a database pointer/object. If the message
145             # pertains to a failed open, the pointer is null, in which case the
146             # message gets saved to what essentially acts like a global (possible
147             # threading conflict).
148             #
149             sub addmsg{ my( $noid, $message )=@_;
150              
151             $noid ||= ""; # act like a global in case $noid undefined
152             $opendbtab{"msg/$noid"} .= $message . "\n";
153             return 1;
154             }
155              
156             # Returns accumulated messages for a database pointer/object. If the
157             # second argument is non-zero, also reset the message to the empty string.
158             #
159             sub errmsg{ my( $noid, $reset )=@_;
160              
161             $noid ||= ""; # act like a global in case $noid undefined
162             my $s = $opendbtab{"msg/$noid"};
163             $reset and
164             $opendbtab{"msg/$noid"} = "";
165             return $s;
166             }
167              
168             sub logmsg{ my( $noid, $message )=@_;
169              
170             $noid ||= ""; # act like a global in case $noid undefined
171             my $logfhandle = $opendbtab{"log/$noid"};
172             defined($logfhandle) and
173             print($logfhandle $message, "\n");
174             # yyy file was opened for append -- hopefully that means always
175             # append even if others have appended to it since our last append;
176             # possible sync problems...
177             return 1;
178             }
179              
180             sub storefile { my( $fname, $contents )=@_;
181             ! open(OUT, ">$fname") and
182             return 0;
183             print OUT $contents;
184             close(OUT);
185             return 1;
186             }
187              
188             # Legal values of $how for the bind function.
189             #
190             my @valid_hows = qw(
191             new replace set
192             append prepend add insert
193             delete purge mint peppermint
194             );
195              
196             #
197             # --- begin alphabetic listing (with a few exceptions) of functions ---
198             #
199              
200             # Returns ANVL message on success, undef on error.
201             #
202             sub bind { my( $noid, $contact, $validate, $how, $id, $elem, $value )=@_;
203             # yyy to add: incr, decr for $how; possibly other ops (* + - / **)
204              
205             # Validate identifier and element if necessary.
206             #
207             # yyy to do: check $elem against controlled vocab
208             # (for errors more than for security)
209             # yyy should this genonly setting be so capable of contradicting
210             # the $validate arg?
211             $$noid{"$R/genonly"} && $validate
212             && ! validate($noid, "-", $id) and
213             return(undef)
214             or
215             ! defined($id) || $id eq "" and
216             addmsg($noid, "error: bind needs an identifier specified."),
217             return(undef)
218             ;
219             ! defined($elem) || $elem eq "" and
220             addmsg($noid, qq@error: "bind $how" requires an element name.@),
221             return(undef);
222              
223             # Transform and place a "hold" (if "long" term and we're not deleting)
224             # on a special identifier. Right now that means a user-entrered Id
225             # of the form :idmap/Idpattern. In this case, change it to a database
226             # Id of the form "$R/idmap/$elem", and change $elem to hold Idpattern;
227             # this makes lookup faster and easier.
228             #
229             # First save original id and element names in $oid and $oelem to
230             # use for all user messages; we use whatever is in $id and $elem
231             # for actual database operations.
232             #
233             my ($oid, $oelem, $hold) = ($id, $elem, 0);
234             if ($id =~ /^:/) {
235             $id !~ m|^:idmap/(.+)| and
236             addmsg($noid, qq@error: $oid: id cannot begin with ":"@
237             . qq@ unless of the form ":idmap/Idpattern".@),
238             return(undef);
239             ($id, $elem) = ("$R/idmap/$oelem", $1);
240             $$noid{"$R/longterm"} and
241             $hold = 1;
242             }
243             # yyy transform other ids beginning with ":"?
244              
245             # Check circulation status. Error if term is "long" and the id
246             # hasn't been issued unless a hold was placed on it.
247             #
248             # If no circ record and no hold...
249             if (! defined($$noid{"$id\t$R/c"}) && ! exists($$noid{"$id\t$R/h"})) {
250             $$noid{"$R/longterm"} and
251             addmsg($noid, "error: "
252             . qq@$oid: "long" term disallows binding @
253             . "an unissued identifier unless a hold is "
254             . "first placed on it."),
255             return(undef)
256             or
257             logmsg($noid, "warning:"
258             . " $oid: binding an unissued identifier"
259             . " that has no hold placed on it.")
260             ;
261             }
262             if (grep(/^$how$/, @valid_hows) != 1) {
263             addmsg($noid, "error: bind how? What does $how mean?");
264             return(undef);
265             }
266             my $peppermint = ($how eq "peppermint");
267             $peppermint and
268             # yyy to do
269             addmsg($noid, qq@error: bind "peppermint" not implemented.@),
270             return(undef);
271             # YYY bind mint file Elem Value -- put into FILE by itself
272             # YYY bind mint stuff_into_big_file Elem Value -- cat into file
273             if ($how eq "mint" || $how eq "peppermint") {
274             $id ne "new" and
275             addmsg(qq@error: bind "mint" requires id to be @
276             . qq@given as "new".@),
277             return(undef);
278             ! ($id = $oid = mint($noid, $contact, $peppermint)) and
279             return(undef);
280             }
281             $how eq "delete" || $how eq "purge" and
282             (defined($value) && $value eq "" and
283             addmsg($noid, qq@error: why does "bind $how" @
284             . "have a supplied value ($value)?"),
285             return(undef)),
286             $value = "",
287             1
288             or
289             ! defined($value) and
290             addmsg($noid,
291             qq@error: "bind $how $elem" requires a value to bind.@),
292             return(undef)
293             ;
294             # If we get here, $value is defined and we can use with impunity.
295              
296             dblock();
297             if (! defined($$noid{"$id\t$elem"})) { # currently unbound
298             grep(/^$how$/, qw( replace append prepend delete )) == 1 and
299             addmsg($noid, qq@error: for "bind $how", "$oid $oelem" @
300             . "must already be bound."),
301             dbunlock(),
302             return(undef);
303             $$noid{"$id\t$elem"} = ""; # can concatenate with impunity
304             }
305             else { # currently bound
306             grep(/^$how$/, qw( new mint peppermint )) == 1 and
307             addmsg($noid, qq@error: for "bind $how", "$oid $oelem" @
308             . " cannot already be bound."),
309             dbunlock(),
310             return(undef);
311             }
312             # We don't care about bound/unbound for: set, add, insert, purge
313              
314             my $oldlen = length($$noid{"$id\t$elem"});
315             my $newlen = length($value);
316             my $statmsg = "$newlen bytes written";
317              
318             $how eq "delete" || $how eq "purge" and
319             delete($$noid{"$id\t$elem"}),
320             $statmsg = "$oldlen bytes removed"
321             or
322             $how eq "add" || $how eq "append" and
323             $$noid{"$id\t$elem"} .= $value,
324             $statmsg .= " to the end of $oldlen bytes",
325             or
326             $how eq "insert" || $how eq "prepend" and
327             $$noid{"$id\t$elem"} = $value . $$noid{"$id\t$elem"},
328             $statmsg .= " to the beginning of $oldlen bytes",
329             or
330             $$noid{"$id\t$elem"} = $value,
331             $statmsg .= ", replacing $oldlen bytes",
332             ;
333             $hold and exists($$noid{"$id\t$elem"}) and ! hold_set($noid, $id) and
334             $hold = -1; # don't just bail out -- we need to unlock
335              
336             # yyy $contact info ? mainly for "long" term identifiers?
337             dbunlock();
338              
339             return(
340             # yyy should this $id be or not be $oid???
341             # yyy should labels for Id and Element be lowercased???
342             "Id: $id
343             Element: $elem
344             Bind: $how
345             Status: " . ($hold == -1 ? errmsg($noid) : "ok, $statmsg") . "\n");
346             }
347              
348             # Primes:
349             # 2 3 5 7
350             # 11 13 17 19
351             # 23 29 31 37
352             # 41 43 47 53
353             # 59 61 67 71
354             # 73 79 83 89
355             # 97 101 103 107
356             # 109 113 127 131
357             # 137 139 149 151
358             # 157 163 167 173
359             # 179 181 191 193
360             # 197 199 211 223
361             # 227 229 233 239
362             # 241 251 257 263
363             # 269 271 277 281
364             # 283 293 307 311
365             # 313 317 331 337
366             # 347 349 353 359
367             # 367 373 379 383
368             # 389 397 401 409
369             # 419 421 431 433
370             # 439 443 449 457
371             # 461 463 467 479
372             # 487 491 499 503 ...
373              
374             # yyy other character subsets? eg, 0-9, a-z, and _ (37 chars, with 37 prime)
375             # this could be mask character 'w' ?
376             # yyy there are 94 printable ASCII characters, with nearest lower prime = 89
377             # a radix of 89 would result in a huge, compact space with check chars
378             # mask character 'c' ?
379              
380             # Extended digits array. Maps ordinal value to ASCII character.
381             my @xdig = (
382             '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
383             'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'm', 'n',
384             'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z'
385             );
386             # $legalstring should be 0123456789bcdfghjkmnpqrstvwxz
387             $legalstring = join('', @xdig);
388             $alphacount = scalar(@xdig); # extended digits count
389             $digitcount = 10; # pure digit count
390              
391             # Ordinal value hash for extended digits. Maps ASCII characters to ordinals.
392             my %ordxdig = (
393             '0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
394             '5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
395              
396             'b' => 10, 'c' => 11, 'd' => 12, 'f' => 13, 'g' => 14,
397             'h' => 15, 'j' => 16, 'k' => 17, 'm' => 18, 'n' => 19,
398              
399             'p' => 20, 'q' => 21, 'r' => 22, 's' => 23, 't' => 24,
400             'v' => 25, 'w' => 26, 'x' => 27, 'z' => 28
401             );
402              
403             # Compute check character for given identifier. If identifier ends in '+'
404             # (plus), replace it with a check character computed from the preceding chars,
405             # and return the modified identifier. If not, isolate the last char and
406             # compute a check character using the preceding chars; return the original
407             # identifier if the computed char matches the isolated char, or undef if not.
408              
409             # User explanation: check digits help systems to catch transcription
410             # errors that users might not be aware of upon retrieval; while users
411             # often have other knowledge with which to determine that the wrong
412             # retrieval occurred, this error is sometimes not readily apparent.
413             # Check digits reduce the chances of this kind of error.
414             # yyy ask Steve Silberstein (of III) about check digits?
415              
416             sub checkchar{ my( $id )=@_;
417             return undef
418             if (! $id );
419             my $lastchar = chop($id);
420             my $pos = 1;
421             my $sum = 0;
422             my $c;
423             for $c (split(//, $id)) {
424             # if character undefined, it's ordinal value is zero
425             $sum += $pos * (defined($ordxdig{"$c"}) ? $ordxdig{"$c"} : 0);
426             $pos++;
427             }
428             my $checkchar = $xdig[$sum % $alphacount];
429             #print "RADIX=$alphacount, mod=", $sum % $alphacount, "\n";
430             return $id . $checkchar
431             if ($lastchar eq "+" || $lastchar eq $checkchar);
432             return undef; # must be request to check, but failed match
433             # xxx test if check char changes on permutations
434             # XXX include test of length to make sure < than 29 (R) chars long
435             # yyy will this work for doi/handles?
436             }
437              
438             # Returns an array of cleared ids and byte counts if $verbose is set,
439             # otherwise returns an empty array. Set $verbose when we want to report what
440             # was cleared. Admin bindings aren't touched; they must be cleared manually.
441             #
442             # We always check for bindings before issuing, because even a previously
443             # unissued id may have been bound (unusual for many minter situations).
444             #
445             # Use dblock() before and dbunlock() after calling this routine.
446             #
447             sub clear_bindings { my( $noid, $id, $verbose )=@_;
448              
449             my @retvals;
450             my $db = $opendbtab{"bdb/$noid"};
451             my $cursor = $db->db_cursor();
452              
453             # yyy right now "$id\t" defines how we bind stuff to an id, but in the
454             # future that could change. in particular we don't bind (now)
455             # anything to just "$id" (without a tab after it)
456             my ($first, $skip, $done) = ("$id\t", 0, 0);
457             my ($key, $value) = ($first, 0);
458             my $status = $cursor->c_get($key, $value, DB_SET_RANGE);
459             $status == 0 and
460             $skip = ($key =~ m|^$first$R/|),
461             $done = ($key !~ m|^$first|),
462             1 or
463             $done = 1
464             ;
465             while (! $done) {
466             ! $skip and $verbose and
467             # if $verbose (ie, fetch), include label and
468             # remember to strip "Id\t" from front of $key
469             push(@retvals, ($key =~ /^[^\t]*\t(.*)/ ? $1 : $key)
470             . ": clearing " . length($value) . " bytes"),
471             delete($$noid{$key});
472             $status = $cursor->c_get($key, $value, DB_NEXT);
473             $status != 0 || $key !~ /^$first/ and
474             $done = 1 # no more elements under id
475             or
476             $skip = ($key =~ m|^$first$R/|)
477             ;
478             }
479             undef($cursor);
480             return($verbose ? @retvals : ());
481             }
482              
483             # Returns a short printable message on success, undef on error.
484             #
485             sub dbcreate { my( $dbdir, $contact, $template, $term,
486             $naan, $naa, $subnaa )=@_;
487              
488             my ($total, $noid);
489             my $dir = "$dbdir/NOID";
490             my $dbname = "$dir/noid.bdb";
491             # yyy try to use "die" to communicate to caller (graceful?)
492             # yyy how come tie doesn't complain if it exists already?
493              
494             -e $dbname and
495             addmsg(undef, "error: a NOID database already exists in "
496             . ($dbdir ne "." ? "\"$dbdir\"."
497             : "the current directory.") . "\n"
498             . "\tTo permit creation of a new minter, rename\n"
499             . "\tor remove the entire NOID subdirectory."),
500             return(undef);
501             ! -d $dir && ! mkdir($dir) and
502             addmsg(undef, "error: couldn't create database directory\n"
503             . "$dir: $!\n"),
504             return(undef);
505              
506             my ($prefix, $mask, $gen_type, $msg, $genonly);
507             ! defined($template) and
508             $genonly = 0,
509             $template = ".zd"
510             or
511             $genonly = 1, # not generated ids only
512             ;
513             $total = parse_template($template, $prefix, $mask, $gen_type, $msg);
514             ! $total and
515             addmsg($noid, $msg),
516             return(undef);
517             my $synonym = "noid" . ($genonly ? "_$msg" : "any");
518              
519             # Type check various parameters.
520             #
521             ! defined($contact) || $contact !~ /\S/ and
522             addmsg($noid, "error: contact ($contact) must be non-empty."),
523             return(undef);
524              
525             $term ||= "-";
526             $term ne "long" && $term ne "medium"
527             && $term ne "-" && $term ne "short" and
528             addmsg($noid, "error: term ($term) must be either "
529             . qq@"long", "medium", "-", or "short".@),
530             return(undef);
531              
532             ! defined($naa) and $naa = "";
533             ! defined($naan) and $naan = "";
534             ! defined($subnaa) and $subnaa = "";
535              
536             $term eq "long" &&
537             ($naan !~ /\S/ || $naa !~ /\S/ || $subnaa !~ /\S/) and
538             addmsg($noid, qq@error: longterm identifiers require @
539             . "an NAA Number, NAA, and SubNAA."),
540             return(undef);
541             # xxx should be able to check naa and naan live against registry
542             # yyy code should invite to apply for NAAN by email to ark@cdlib.org
543             # yyy ARK only? why not DOI/handle?
544             $term eq "long" && ($naan !~ /\d\d\d\d\d/) and
545             addmsg($noid, qq@error: term of "long" requires a @
546             . "5-digit NAAN (00000 if none), and non-empty "
547             . "string values for NAA and SubNAA."),
548             return(undef);
549              
550             # Create log and logbdb files from scratch and make them writable
551             # before calling dbopen().
552             #
553             ! storefile("$dir/log", "") || ! chmod(0666, "$dir/log") and
554             addmsg(undef, "Couldn't chmod log file: $!"),
555             return(undef);
556             ! storefile("$dir/logbdb", "") || ! chmod(0666, "$dir/logbdb") and
557             addmsg(undef, "Couldn't chmod logbdb file: $!"),
558             return(undef);
559             ! ($noid = dbopen($dbname, DB_CREATE)) and
560             addmsg(undef, "can't create database file: $!"),
561             return(undef);
562             logmsg($noid, ($template ?
563             qq@Creating database for template "$template".@
564             : "Creating database for bind-only minter."));
565              
566             # Database info
567             # yyy should be using db-> ops directly (for efficiency and?)
568             # so we can use DB_DUP flag
569             $$noid{"$R/naa"} = $naa;
570             $$noid{"$R/naan"} = $naan;
571             $$noid{"$R/subnaa"} = $subnaa || "";
572              
573             $$noid{"$R/longterm"} = ($term eq "long");
574             $$noid{"$R/wrap"} = ($term eq "short"); # yyy follow through
575              
576             $$noid{"$R/template"} = $template;
577             $$noid{"$R/prefix"} = $prefix;
578             $$noid{"$R/mask"} = $mask;
579             $$noid{"$R/firstpart"} = ($naan ? $naan . "/" : "") . $prefix;
580             $$noid{"$R/addcheckchar"} = ($mask =~ /k$/); # boolean answer
581              
582             $$noid{"$R/generator_type"} = $gen_type;
583             $$noid{"$R/genonly"} = $genonly;
584              
585             $$noid{"$R/total"} = $total;
586             $$noid{"$R/padwidth"} = ($total == NOLIMIT ? 16 : 2) + length($mask);
587             # yyy kludge -- padwidth of 16 enough for most lvf sorting
588              
589             # Some variables:
590             # oacounter overall counter's current value (last value minted)
591             # oatop overall counter's greatest possible value of counter
592             # held total with "hold" placed
593             # queued total currently in the queue
594             $$noid{"$R/oacounter"} = 0;
595             $$noid{"$R/oatop"} = $total;
596             $$noid{"$R/held"} = 0;
597             $$noid{"$R/queued"} = 0;
598              
599             $$noid{"$R/fseqnum"} = SEQNUM_MIN; # see queue() and mint()
600             $$noid{"$R/gseqnum"} = SEQNUM_MIN; # see queue()
601             $$noid{"$R/gseqnum_date"} = 0; # see queue()
602              
603             $$noid{"$R/version"} = $VERSION;
604              
605             # yyy should verify that a given NAAN and NAA are registered,
606             # and should offer to register them if not.... ?
607              
608             # Capture the properties of this minter.
609             #
610             # There are seven properties, represented by a string of seven
611             # capital letters or a hyphen if the property does not apply.
612             # The maximal string is GRANITE (we first had GRANT, then GARNET).
613             # We don't allow 'l' as an extended digit (good for minimizing
614             # visual transcriptions errors), but we don't get a chance to brag
615             # about that here.
616             #
617             # Note that on the Mohs mineral hardness scale from 1 - 10,
618             # the hardest is diamonds (which are forever), but granites
619             # (combinations of feldspar and quartz) are 5.5 to 7 in hardness.
620             # From http://geology.about.com/library/bl/blmohsscale.htm ; see also
621             # http://www.mineraltown.com/infocoleccionar/mohs_scale_of_hardness.htm
622             #
623             # These are far from perfect measures of identifier durability,
624             # and of course they are only from the assigner's point of view.
625             # For example, an alphabetical restriction doesn't guarantee
626             # opaqueness, but it indicates that semantics will be limited.
627             #
628             # yyy document that (I)mpressionable has to do with printing, does
629             # not apply to general URLs, but does apply to phone numbers and
630             # ISBNs and ISSNs
631             # yyy document that the opaqueness test is English-centric -- these
632             # measures work to some extent in English, but not in Welsh(?)
633             # or "l33t"
634             # yyy document that the properties are numerous enough to look for
635             # a compact acronym, that the choice of acronym is sort of
636             # arbitrary, so (GRANITE) was chosen since it's easy to remember
637             #
638             # $pre and $msk are in service of the letter "A" below.
639             (my $pre = $prefix) =~ s/[a-z]/e/ig;
640             (my $msk = $mask) =~ s/k/e/g;
641             $msk =~ s/^ze/zeeee/; # initial 'e' can become many later on
642              
643             my $properties =
644             ($naan ne "" && $naan ne "00000" ? "G" : "-")
645             . ($gen_type eq "random" ? "R" : "-")
646             # yyy substr is supposed to cut off first char
647             . ($genonly && ($pre . substr($msk, 1)) !~ /eee/ ? "A" : "-")
648             . ($term eq "long" ? "N" : "-")
649             . ($genonly && $prefix !~ /-/ ? "I" : "-")
650             . ($$noid{"$R/addcheckchar"} ? "T" : "-")
651             # yyy "E" mask test anticipates future extensions to alphabets
652             . ($genonly && ($prefix =~ /[aeiouy]/i || $mask =~ /[^rszdek]/)
653             ? "-" : "E") # Elided vowels or not
654             ;
655             $$noid{"$R/properties"} = $properties;
656              
657             # Now figure out "where" element.
658             #
659             use Sys::Hostname;
660             my $host = hostname();
661              
662             # my $child_process_id;
663             # unless (defined($child_process_id = open(CHILD, "-|"))) {
664             # die "unable to start child process, $!, stopped";
665             # }
666             # if ($child_process_id == 0) {
667             # # We are in the child. Set the PATH environment variable.
668             # $ENV{"PATH"} = "/bin:/usr/bin";
669             # # Run the command we want, with its STDOUT redirected
670             # # to the pipe that goes back to the parent.
671             # exec "/bin/hostname";
672             # die "unable to execute \"/bin/hostname\", $!, stopped";
673             # }
674             # else {
675             # # We are in the parent, and the CHILD file handle is
676             # # the read end of the pipe that has its write end as
677             # # STDOUT of the child.
678             # $host = ;
679             # close(CHILD);
680             # chomp $host;
681             # }
682              
683             my $cwd = $dbdir; # by default, assuming $dbdir is absolute path
684             if ($dbdir !~ m|^/|) {
685             $cwd = $ENV{"PWD"} . "/$dbdir";
686             }
687              
688             # Adjust some empty values for short-term display purposes.
689             #
690             $naa ||= "no Name Assigning Authority";
691             $subnaa ||= "no sub authority";
692             $naan ||= "no NAA Number";
693              
694             # Create a human- and machine-readable report.
695             #
696             my @p = split(//, $properties); # split into letters
697             s/-/_ not/ || s/./_____/
698             for (@p);
699             my $random_sample; # undefined on purpose
700             $total == NOLIMIT and
701             $random_sample = int(rand(10)); # first sample less than 10
702             my $sample1 = sample($noid, $random_sample);
703             $total == NOLIMIT and
704             $random_sample = int(rand(100000)); # second sample bigger
705             my $sample2 = sample($noid, $random_sample);
706              
707             my $htotal = ($total == NOLIMIT ? "unlimited" : human_num($total));
708             my $what = ($total == NOLIMIT ? "unlimited" : $total)
709             . qq@ $gen_type identifiers of form $template
710             A Noid minting and binding database has been created that will bind
711             @
712             . ($genonly ? "" : "any identifier ") . "and mint "
713             . ($total == NOLIMIT ? qq@an unbounded number of identifiers
714             with the template "$template".@
715             : $htotal . qq@ identifiers with the template "$template".@)
716             . qq@
717             Sample identifiers would be "$sample1" and "$sample2".
718             Minting order is $gen_type.@;
719              
720             $$noid{"$R/erc"} =
721             qq@# Creation record for the identifier generator in NOID/noid.bdb.
722             #
723             erc:
724             who: $contact
725             what: $what
726             when: @ . temper() . qq@
727             where: $host:$cwd
728             Version: Noid $VERSION
729             Size: @ . ($total == NOLIMIT ? "unlimited" : $total) . qq@
730             Template: @ . (! $template ? "(:none)" : $template . qq@
731             A suggested parent directory for this template is "$synonym". Note:
732             separate minters need separate directories, and templates can suggest
733             short names; e.g., the template "xz.redek" suggests the parent directory
734             "noid_xz4" since identifiers are "xz" followed by 4 characters.@) . qq@
735             Policy: (:$properties)
736             This minter's durability summary is (maximum possible being "GRANITE")
737             "$properties", which breaks down, property by property, as follows.
738             ^^^^^^^
739             |||||||_$p[6] (E)lided of vowels to avoid creating words by accident
740             ||||||_$p[5] (T)ranscription safe due to a generated check character
741             |||||_$p[4] (I)mpression safe from ignorable typesetter-added hyphens
742             ||||_$p[3] (N)on-reassignable in life of Name Assigning Authority
743             |||_$p[2] (A)lphabetic-run-limited to pairs to avoid acronyms
744             ||_$p[1] (R)andomly sequenced to avoid series semantics
745             |_$p[0] (G)lobally unique within a registered namespace (currently
746             tests only ARK namespaces; apply for one at ark@
747             . '@' . qq@cdlib.org)
748             Authority: $naa | $subnaa
749             NAAN: $naan
750             @;
751             ! storefile("$dir/README", $$noid{"$R/erc"})
752             and return(undef);
753             # yyy useful for quick info on a minter from just doing 'ls NOID'??
754             # storefile("$dir/T=$prefix.$mask", "foo\n");
755              
756             my $report = qq@Created: minter for $what @
757             . qq@See $dir/README for details.\n@;
758              
759             ! $template and
760             dbclose($noid),
761             return($report);
762              
763             init_counters($noid);
764             dbclose($noid);
765             return($report);
766             }
767              
768             # Report values according to $level. Values of $level:
769             # "brief" (default) user vals and interesting admin vals
770             # "full" user vals and all admin vals
771             # "dump" all vals, including all identifier bindings
772             #
773             sub dbinfo { my( $noid, $level )=@_;
774             my $db = $opendbtab{"bdb/$noid"};
775             my $cursor = $db->db_cursor();
776             my ($key, $value) = ("$R/", 0);
777             if ($level eq "dump") {
778             print "$key: $value\n"
779             while ($cursor->c_get($key, $value, DB_NEXT) == 0);
780             return 1;
781             }
782             my $status = $cursor->c_get($key, $value, DB_SET_RANGE);
783             if ($status) {
784             addmsg($noid, "c_get status/errno ($status/$!)");
785             return 0;
786             }
787             if ($key =~ m|^$R/$R/|) {
788             print "User Assigned Values\n";
789             print " $key: $value\n";
790             while ($cursor->c_get($key, $value, DB_NEXT) == 0) {
791             last
792             if ($key !~ m|^$R/$R/|);
793             print " $key: $value\n";
794             }
795             print "\n";
796             }
797             print "Admin Values\n";
798             print " $key: $value\n";
799             while ($cursor->c_get($key, $value, DB_NEXT) == 0) {
800             last
801             if ($key !~ m|^$R/|);
802             print " $key: $value\n"
803             if ($level eq "full" or
804             $key !~ m|^$R/c\d| &&
805             $key !~ m|^$R/saclist| &&
806             $key !~ m|^$R/recycle/|);
807             }
808             print "\n";
809             undef $cursor;
810             return 1;
811             }
812              
813             # yyy eventually we would like to do fancy fine-grained locking with
814             # BerkeleyDB features. For now, lock before tie(), unlock after untie().
815             sub dblock{ return 1; # placeholder
816             }
817             sub dbunlock{ return 1; # placeholder
818             }
819              
820             # returns noid: a listref
821             # $flags can be DB_RDONLY, DB_CREATE, or 0 (for read/write, the default)
822             #
823             sub dbopen { my( $dbname, $flags )=@_;
824              
825             # yyy to test: can we now open more than one noid at once?
826              
827             my ($env, $envhome);
828             ($envhome = $dbname) =~ s|[^/]+$||; # path ending in "NOID/"
829             ! -d $envhome and
830             addmsg(undef, "$envhome not a directory"),
831             return undef;
832             # yyy probably these envflags are overkill right now
833             my $envflags = DB_INIT_LOCK | DB_INIT_TXN | DB_INIT_MPOOL;
834             #my $envflags = DB_INIT_CDB | DB_INIT_MPOOL;
835             ($flags & DB_CREATE) and
836             $envflags |= DB_CREATE;
837             my @envargs = (
838             -Home => $envhome,
839             -Flags => $envflags,
840             -Verbose => 1
841             );
842              
843             # If it exists and is writable, use log file to inscribe BDB errors.
844             #
845             my ($logfile, $logfhandle, $log_opened, $logbdb);
846              
847             $logfile = $envhome . "log";
848             $log_opened = open($logfhandle, ">>$logfile");
849             $logbdb = $envhome . "logbdb";
850             -w $logbdb and
851             push(@envargs, ( -ErrFile => $logbdb ));
852             # yyy should we complain if can't open log file?
853              
854             $env = new BerkeleyDB::Env @envargs;
855             ! defined($env) and
856             addmsg(undef, "no \"Env\" object ($BerkeleyDB::Error)"),
857             return undef;
858              
859             #=for deleting
860             #
861             # print "OK so far\n"; exit(0);
862             # if ($flags && DB_CREATE) {
863             # # initialize environment files
864             # print "envhome=$envhome\n";
865             # $env = new BerkeleyDB::Env @envargs;
866             # ! defined($env) and
867             # addmsg(undef,
868             # "no \"Env\" object ($BerkeleyDB::Error)"),
869             # return undef;
870             # }
871             # else {
872             # print "flags=$flags\n";
873             # }
874             # print "OK so far\n"; exit(0);
875             # $env = new BerkeleyDB::Env @envargs;
876             # unless (defined($env)) {
877             # die "unable to get a \"BerkeleyDB::Env\" object ($BerkeleyDB::Error), stopped";
878             # }
879             #
880             #=cut
881              
882             my $noid = {}; # eventual minter database handle
883              
884             # For now we use simple database-level file locking with a timeout.
885             # Unlocking is implicit when the NOIDLOCK file handle is closed
886             # either explicitly or upon process termination.
887             #
888             my $lockfile = $envhome . "lock";
889             my $timeout = 5; # max number of seconds to wait for lock
890             my $locktype = (($flags & DB_RDONLY) ? LOCK_SH : LOCK_EX);
891              
892             ! sysopen(NOIDLOCK, $lockfile, O_RDWR | O_CREAT) and
893             addmsg(undef, "cannot open \"$lockfile\": $!"),
894             return undef;
895             eval {
896            
897             local $SIG{ALRM} = sub { die("lock timeout after $timeout "
898             . "seconds; consider removing \"$lockfile\"\n")
899             };
900             alarm $timeout; # alarm goes off in $timeout seconds
901             eval { # yyy if system has no flock, say in dbcreate profile?
902             flock(NOIDLOCK, $locktype) # blocking lock
903             or die("cannot flock: $!");
904             };
905             alarm 0; # cancel the alarm
906             die $@ if $@; # re-raise the exception
907             };
908             alarm 0; # race condition protection
909             if ($@) { # re-raise the exception
910             addmsg(undef, "error: $@");
911             return undef;
912             }
913              
914             my $db = tie(%$noid, "BerkeleyDB::Btree",
915             -Filename => "noid.bdb", # env has path to it
916             -Flags => $flags,
917             ## yyy -Property => DB_DUP,
918             -Env => $env)
919             or addmsg(undef, "tie failed on $dbname: $BerkeleyDB::Error")
920             and return undef;
921             # yyy how to set error code or return string?
922             # or die("Can't open database file: $!\n");
923             #print "dbopen: returning hashref=$noid, db=$db\n";
924             $opendbtab{"bdb/$noid"} = $db;
925             $opendbtab{"msg/$noid"} = "";
926             $opendbtab{"log/$noid"} = ($log_opened ? $logfhandle : undef);
927              
928             $locktest and
929             print("locktest: holding lock for $locktest seconds...\n"),
930             sleep($locktest);
931              
932             return $noid;
933             }
934              
935             # Call with number of seconds to sleep at end of each open.
936             # This exists only for the purpose of testing the locking mechanism.
937             #
938             sub locktest { my( $sleepvalue )=@_;
939             $locktest = $sleepvalue; # set global variable for locktest
940             return 1;
941             }
942              
943             sub dbclose { my( $noid )=@_;
944             undef $opendbtab{"msg/$noid"};
945             defined($opendbtab{"log/$noid"}) and
946             close($opendbtab{"log/$noid"});
947             undef $opendbtab{"bdb/$noid"};
948             untie %$noid;
949             close NOIDLOCK; # let go of lock
950             }
951              
952             # yyy is this needed? in present form?
953             #
954             # get next value and, if no error, change the 2nd and 3rd parameters and
955             # return 1, else return 0. To start at the beginning, the 2nd parameter,
956             # key (key), should be set to zero by caller, who might do this:
957             # $key = 0; while (each($noid, $key, $value)) { ... }
958             # The 3rd parameter will contain the corresponding value.
959              
960             sub eachnoid { my( $noid, $key, $value )=@_;
961             # yyy check that $db is tied? this is assumed for now
962             # yyy need to get next non-admin key/value pair
963             my $db = $opendbtab{"bdb/$noid"};
964             #was: my $flag = ($key ? R_NEXT : R_FIRST);
965             # fix from Jim Fullton:
966             my $flag = ($key ? DB_NEXT : DB_FIRST);
967             my $cursor = $db->db_cursor();
968             if ($cursor->c_get($key, $value, $flag)) {
969             return 0;
970             }
971             $_[1] = $key;
972             $_[2] = $value;
973             return 1;
974             }
975              
976             # A no-op function to call instead of checkchar().
977             #
978             sub echo {
979             return $_[0];
980             }
981              
982             # $verbose is 1 if we want labels, 0 if we don't
983             # yyy do we need to be able to "get/fetch" with a discriminant,
984             # eg, for smart multiple resolution??
985             sub fetch { my( $noid, $verbose, $id, @elems )=@_;
986              
987             ! defined($id) and
988             addmsg($noid, "error: " . ($verbose ? "fetch" : "get")
989             . " requires that an identifier be specified."),
990             return(undef);
991              
992             my ($hdr, $retval) = ("", "");
993             $verbose and $hdr = "id: $id"
994             . (exists($$noid{"$id\t$R/h"}) ? " hold" : "") . "\n"
995             . (validate($noid, "-", $id) ? "" : errmsg($noid) . "\n")
996             . "Circ: " . ($$noid{"$id\t$R/c"}
997             ? $$noid{"$id\t$R/c"} : "uncirculated") . "\n";
998              
999             my $db = $opendbtab{"bdb/$noid"};
1000             my $cursor = $db->db_cursor();
1001              
1002             if ($#elems < 0) { # No elements were specified, so find them.
1003             my ($first, $skip, $done) = ("$id\t", 0, 0);
1004             my ($key, $value) = ($first, 0);
1005             my $status = $cursor->c_get($key, $value, DB_SET_RANGE);
1006             $status == 0 and
1007             $skip = ($key =~ m|^$first$R/|),
1008             $done = ($key !~ m|^$first|),
1009             1 or
1010             $done = 1
1011             ;
1012             while (! $done) {
1013             ! $skip and
1014             # if $verbose (ie, fetch), include label and
1015             # remember to strip "Id\t" from front of $key
1016             $retval .= ($verbose ?
1017             ($key =~ /^[^\t]*\t(.*)/ ? $1 : $key)
1018             . ": " : "") . "$value\n";
1019             $status = $cursor->c_get($key, $value, DB_NEXT);
1020             $status != 0 || $key !~ /^$first/ and
1021             $done = 1 # no more elements under id
1022             or
1023             $skip = ($key =~ m|^$first$R/|)
1024             ;
1025             }
1026             undef($cursor);
1027             ! $retval and
1028             addmsg($noid, $hdr
1029             . "note: no elements bound under $id."),
1030             return(undef);
1031             return($hdr . $retval);
1032             }
1033             # yyy should this work for elem names with regexprs in them?
1034             # XXX idmap won't bind with longterm ???
1035             my $idmapped;
1036             for my $elem (@elems) {
1037             $$noid{"$id\t$elem"} and
1038             ($verbose and
1039             $retval .= "$elem: "),
1040             $retval .= $$noid{"$id\t$elem"} . "\n"
1041             or
1042             $idmapped = id2elemval($cursor, $verbose, $id, $elem),
1043             ($verbose and
1044             $retval .= ($idmapped ? "$idmapped\nnote: "
1045             . "previous result produced by :idmap\n"
1046             : qq@error: "$id $elem" is not bound.\n@)
1047             or
1048             $retval .= "$idmapped\n"
1049             )
1050             ;
1051             }
1052             undef($cursor);
1053             return($hdr . $retval);
1054             }
1055              
1056             # Generate the actual next id to give out. May be randomly or sequentially
1057             # selected. This routine should not be called if there are ripe recyclable
1058             # identifiers to use.
1059             #
1060             # This routine and n2xdig comprise the real heart of the minter software.
1061             #
1062             sub genid { my( $noid )=@_;
1063             dblock();
1064              
1065             # Variables:
1066             # oacounter overall counter's current value (last value minted)
1067             # oatop overall counter's greatest possible value of counter
1068             # saclist (sub) active counters list
1069             # siclist (sub) inactive counters list
1070             # c$n/value subcounter name's ($scn) value
1071              
1072             my $oacounter = $$noid{"$R/oacounter"};
1073              
1074             # yyy what are we going to do with counters for held? queued?
1075              
1076             if ($$noid{"$R/oatop"} != NOLIMIT && $oacounter >= $$noid{"$R/oatop"}) {
1077              
1078             # Critical test of whether we're willing to re-use identifiers
1079             # by re-setting (wrapping) the counter to zero. To be extra
1080             # careful we check both the longterm and wrap settings, even
1081             # though, in theory, wrap won't be set if longterm is set.
1082             #
1083             if ($$noid{"$R/longterm"} || ! $$noid{"$R/wrap"}) {
1084             dbunlock();
1085             my $m = "error: identifiers exhausted (stopped at "
1086             . $$noid{"$R/oatop"} . ").";
1087             addmsg($noid, $m);
1088             logmsg($noid, $m);
1089             return undef;
1090             }
1091             # If we get here, term is not "long".
1092             logmsg($noid, temper() . ": Resetting counter to zero; "
1093             . "previously issued identifiers will be re-issued");
1094             if ($$noid{"$R/generator_type"} eq "sequential") {
1095             $$noid{"$R/oacounter"} = 0;
1096             }
1097             else {
1098             init_counters($noid); # yyy calls dblock -- problem?
1099             }
1100             $oacounter = 0;
1101             }
1102             # If we get here, the counter may actually have just been reset.
1103              
1104             # Deal with the easy sequential generator case and exit early.
1105             #
1106             if ($$noid{"$R/generator_type"} eq "sequential") {
1107             my $id = &n2xdig($$noid{"$R/oacounter"}, $$noid{"$R/mask"});
1108             $$noid{"$R/oacounter"}++; # incr to reflect new total
1109             dbunlock();
1110             return $id;
1111             }
1112              
1113             # If we get here, the generator must be of type "random".
1114             #
1115             my $len = (my @saclist = split(/ /, $$noid{"$R/saclist"}));
1116             if ($len < 1) {
1117             dbunlock();
1118             addmsg($noid, "error: no active counters panic, "
1119             . "but $oacounter identifiers left?");
1120             return undef;
1121             }
1122             my $randn = int(rand($len)); # pick a specific counter name
1123             my $sctrn = $saclist[$randn]; # at random; then pull its $n
1124             my $n = substr($sctrn, 1); # numeric equivalent from the name
1125             #print "randn=$randn, sctrn=$sctrn, counter n=$n\t";
1126             my $sctr = $$noid{"$R/${sctrn}/value"}; # and get its value
1127             $sctr++; # increment and
1128             $$noid{"$R/${sctrn}/value"} = $sctr; # store new current value
1129             $$noid{"$R/oacounter"}++; # incr overall counter - some
1130             # redundancy for sanity's sake
1131              
1132             # deal with an exhausted subcounter
1133             if ($sctr >= $$noid{"$R/${sctrn}/top"}) {
1134             my ($c, $modsaclist) = ("", "");
1135             # remove from active counters list
1136             foreach $c (@saclist) { # drop $sctrn, but add it to
1137             next if ($c eq $sctrn); # inactive subcounters
1138             $modsaclist .= "$c ";
1139             }
1140             $$noid{"$R/saclist"} = $modsaclist; # update saclist
1141             $$noid{"$R/siclist"} .= " $sctrn"; # and siclist
1142             #print "===> Exhausted counter $sctrn\n";
1143             }
1144              
1145             # $sctr holds counter value, $n holds ordinal of the counter itself
1146             my $id = &n2xdig(
1147             $sctr + ($n * $$noid{"$R/percounter"}),
1148             $$noid{"$R/mask"});
1149             dbunlock();
1150             return $id;
1151             }
1152              
1153             # Identifier admin info is stored in three places:
1154             #
1155             # id\t:/h hold status: if exists = hold, else no hold
1156             # id\t:/c circulation record, if it exists, is
1157             # circ_status_history_vector|when|contact(who)|oacounter
1158             # where circ_status_history_vector is a string of [iqu]
1159             # and oacounter is current overall counter value, FWIW;
1160             # circ status goes first to make record easy to update
1161             # id\t:/p pepper
1162             #
1163             # Returns a single letter circulation status, which must be one
1164             # of 'i', 'q', or 'u'. Returns the empty string on error.
1165             #
1166             sub get_circ_svec { my( $noid, $id )=@_;
1167              
1168             my $circ_rec = $$noid{"$id\t$R/c"};
1169             ! defined($circ_rec) and
1170             return '';
1171              
1172             # Circulation status vector (string of letter codes) is the 1st
1173             # element, elements being separated by '|'. We don't care about
1174             # the other elements for now because we can find everything we
1175             # need at the beginning of the string (without splitting it).
1176             # Let errors hit the log file rather than bothering the caller.
1177             #
1178             my $circ_svec = (split(/\|/, $circ_rec))[0];
1179              
1180             ! defined($circ_svec) || $circ_svec eq "" and
1181             logmsg($noid, "error: id $id has no circ status vector -- "
1182             . "circ record is $circ_rec"),
1183             return '';
1184             $circ_svec !~ /^([iqu])[iqu]*$/ and
1185             logmsg($noid, "error: id $id has a circ status vector "
1186             . "containing letters other than 'i', "
1187             . "'q', or 'u' -- circ record is $circ_rec"),
1188             return '';
1189             return $1;
1190             }
1191              
1192             # As a last step of issuing or queuing an identifier, adjust the circulation
1193             # status record. We place a "hold" if we're both issuing an identifier and
1194             # the minter is for "long" term ids. If we're issuing, we also purge any
1195             # element bindings that exist; this means that a queued identifier's bindings
1196             # will by default last until it is re-minted.
1197             #
1198             # The caller must know what they're doing because we don't check parameters
1199             # for errors; this routine is not externally visible anyway. Returns the
1200             # input identifier on success, or undef on error.
1201             #
1202             sub set_circ_rec { my( $noid, $id, $circ_svec, $date, $contact )=@_;
1203              
1204             my $status = 1;
1205             my $circ_rec = "$circ_svec|$date|$contact|" . $$noid{"$R/oacounter"};
1206              
1207             # yyy do we care what the previous circ record was? since right now
1208             # we just clobber without looking at it
1209              
1210             dblock();
1211              
1212             # Check for and clear any bindings if we're issuing an identifier.
1213             # We ignore the return value from clear_bindings().
1214             # Replace or clear admin bindings by hand, including pepper if any.
1215             # yyy pepper not implemented yet
1216             # If issuing a longterm id, we automatically place a hold on it.
1217             #
1218             $circ_svec =~ /^i/ and
1219             clear_bindings($noid, $id, 0),
1220             delete($$noid{"$id\t$R/p"}),
1221             ($$noid{"$R/longterm"} and
1222             $status = hold_set($noid, $id)),
1223             ;
1224             $$noid{"$id\t$R/c"} = $circ_rec;
1225              
1226             dbunlock();
1227              
1228             # This next logmsg should account for the bulk of the log when
1229             # longterm identifiers are in effect.
1230             #
1231             $$noid{"$R/longterm"} and
1232             logmsg($noid, "m: $circ_rec"
1233             . ($status ? "" : " -- hold failed"));
1234              
1235             ! $status and # must be an error in hold_set()
1236             return(undef);
1237             return $id;
1238             }
1239              
1240             # Get the value of any named internal variable (prefaced by $R)
1241             # given an open database reference.
1242             #
1243             sub getnoid { my( $noid, $varname )=@_;
1244             return $$noid{"$R/$varname"};
1245             }
1246              
1247             #=for deleting
1248             ## Simple ancillary counter that we currently use to pair a sequence number
1249             ## with each minted identifier. However, these are independent actions.
1250             ## The direction parameter is negative, zero, or positive to count down,
1251             ## reset, or count up upon call. Returns the current counter value.
1252             ##
1253             ## (yyy should we make it do zero-padding on the left to a fixed width
1254             ## determined by number of digits in the total?)
1255             ##
1256             #sub count { my( $noid, $direction )=@_;
1257             #
1258             # $direction > 0
1259             # and return ++$$noid{"$R/seqnum"};
1260             # $direction < 0
1261             # and return --$$noid{"$R/seqnum"};
1262             # # $direction must == 0
1263             # return $$noid{"$R/seqnum"} = 0;
1264             #}
1265             #=cut
1266              
1267             # A hold may be placed on an identifier to keep it from being minted/issued.
1268             # Returns 1 on success, 0 on error. Sets errmsg() in either case.
1269             #
1270             sub hold { my( $noid, $contact, $on_off, @ids )=@_;
1271              
1272             # yyy what makes sense in this case?
1273             #! $$noid{"$R/template"} and
1274             # addmsg($noid,
1275             # "error: holding makes no sense in a bind-only minter."),
1276             # return(0);
1277             ! defined($contact) and
1278             addmsg($noid, "error: contact undefined"),
1279             return(0);
1280             ! defined($on_off) and
1281             addmsg($noid, qq@error: hold "set" or "release"?@),
1282             return(0);
1283             ! @ids and
1284             addmsg($noid, qq@error: no Id(s) specified@),
1285             return(0);
1286             $on_off ne "set" && $on_off ne "release" and
1287             addmsg($noid, "error: unrecognized hold directive ($on_off)"),
1288             return(0);
1289              
1290             my $release = $on_off eq "release";
1291             # yyy what is sensible thing to do if no ids are present?
1292             my $iderror = "";
1293             $$noid{"$R/genonly"} and
1294             ($iderror = validate($noid, "-", @ids)) !~ /error:/ and
1295             $iderror = "";
1296             $iderror and
1297             addmsg($noid, "error: hold operation not started -- one or "
1298             . "more ids did not validate:\n$iderror"),
1299             return(0);
1300             my $status;
1301             my $n = 0;
1302             for my $id (@ids) {
1303             if ($release) { # no hold means key doesn't exist
1304             logmsg($noid, temper() . " $id: releasing hold")
1305             if ($$noid{"$R/longterm"});
1306             dblock();
1307             $status = hold_release($noid, $id);
1308             }
1309             else { # "hold" means key exists
1310             logmsg($noid, temper() . " $id: placing hold")
1311             if ($$noid{"$R/longterm"});
1312             dblock();
1313             $status = hold_set($noid, $id);
1314             }
1315             dbunlock();
1316             ! $status and
1317             return(0);
1318             $n++; # xxx should report number
1319              
1320             # Incr/Decrement for each id rather than by scalar(@ids);
1321             # if something goes wrong in the loop, we won't be way off.
1322              
1323             # XXX should we refuse to hold if "long" and issued?
1324             # else we cannot use "hold" in the sense of either
1325             # "reserved for future use" or "reserved, never issued"
1326             #
1327             }
1328             addmsg($noid, "ok: $n hold" . ($n == 1 ? "" : "s") . " placed");
1329             return(1);
1330             }
1331              
1332             # Returns 1 on success, 0 on error. Use dblock() before and dbunlock()
1333             # after calling this routine.
1334             # yyy don't care if hold was in effect or not
1335             #
1336             sub hold_set { my( $noid, $id )=@_;
1337              
1338             $$noid{"$id\t$R/h"} = 1; # value doesn't matter
1339             $$noid{"$R/held"}++;
1340             if ($$noid{"$R/total"} != NOLIMIT # ie, if total is non-zero
1341             && $$noid{"$R/held"} > $$noid{"$R/oatop"}) {
1342             my $m = "error: hold count (" . $$noid{"$R/held"}
1343             . ") exceeding total possible on id $id";
1344             addmsg($noid, $m);
1345             logmsg($noid, $m);
1346             return(0);
1347             }
1348             return(1);
1349             }
1350              
1351             # Returns 1 on success, 0 on error. Use dblock() before and dbunlock()
1352             # after calling this routine.
1353             # yyy don't care if hold was in effect or not
1354             #
1355             sub hold_release { my( $noid, $id )=@_;
1356              
1357             delete($$noid{"$id\t$R/h"});
1358             $$noid{"$R/held"}--;
1359             if ($$noid{"$R/held"} < 0) {
1360             my $m = "error: hold count (" . $$noid{"$R/held"}
1361             . ") going negative on id $id";
1362             addmsg($noid, $m);
1363             logmsg($noid, $m);
1364             return(0);
1365             }
1366             return(1);
1367             }
1368              
1369             # Return printable form of an integer after adding commas to separate
1370             # groups of 3 digits.
1371             #
1372             sub human_num { my( $num )=@_;
1373              
1374             $num ||= 0;
1375             my $numstr = sprintf("%u", $num);
1376             if ($numstr =~ /^\d\d\d\d+$/) { # if num is 4 or more digits
1377             $numstr .= ","; # prepare to add commas
1378             while ($numstr =~ s/(\d)(\d\d\d,)/$1,$2/) {};
1379             chop($numstr);
1380             }
1381             return $numstr;
1382             }
1383              
1384             # Return $elem: $val or error string.
1385             #
1386             sub id2elemval { my( $cursor, $verbose, $id, $elem )=@_;
1387              
1388             my $first = "$R/idmap/$elem\t";
1389             my ($key, $value) = ($first, 0);
1390             my $status = $cursor->c_get($key, $value, DB_SET_RANGE);
1391             $status and
1392             return "error: id2elemval: c_get status/errno ($status/$!)";
1393             $key !~ /^$first/ and
1394             return "";
1395             my ($pattern, $newval);
1396             while (1) { # exhaustively visit all patterns for this element
1397             ($pattern) = ($key =~ m|$first(.+)|);
1398             $newval = $id;
1399             defined($pattern) and
1400             # yyy kludgy use of unlikely delimiters
1401             (eval '$newval =~ ' . qq@s$pattern$value@ and
1402             # replaced, so return
1403             return ($verbose ? "$elem: " : "") . $newval),
1404             ($@ and
1405             return "error: id2elemval eval: $@")
1406             ;
1407             $cursor->c_get($key, $value, DB_NEXT) != 0 and
1408             return "";
1409             $key !~ /^$first/ and # no match and ran out of rules
1410             return "";
1411             }
1412             }
1413              
1414             # Initialize counters.
1415             #
1416             sub init_counters { my( $noid )=@_;
1417              
1418             # Variables:
1419             # oacounter overall counter's current value (last value minted)
1420             # saclist (sub) active counters list
1421             # siclist (sub) inactive counters list
1422             # c$n/value subcounter name's ($n) value
1423             # c$n/top subcounter name's greatest possible value
1424              
1425             dblock();
1426              
1427             $$noid{"$R/oacounter"} = 0;
1428             my $total = $$noid{"$R/total"};
1429              
1430             my $maxcounters = 293; # prime, a little more than 29*10
1431             #
1432             # Using a prime under the theory (unverified) that it may help even
1433             # out distribution across the more significant digits of generated
1434             # identifiers. In this way, for example, a method for mapping an
1435             # identifier to a pathname (eg, fk9tmb35x -> fk/9t/mb/35/x/, which
1436             # could be a directory holding all files related to the named
1437             # object), would result in a reasonably balanced filesystem tree
1438             # -- no subdirectories too unevenly loaded. That's the hope anyway.
1439              
1440             $$noid{"$R/percounter"} = # max per counter, last has fewer
1441             int($total / $maxcounters + 1); # round up to be > 0
1442              
1443             my $n = 0;
1444             my $t = $total;
1445             my $pctr = $$noid{"$R/percounter"};
1446             my $saclist = "";
1447             while ($t > 0) {
1448             $$noid{"$R/c${n}/top"} = ($t >= $pctr ? $pctr : $t);
1449             $$noid{"$R/c${n}/value"} = 0; # yyy or 1?
1450             $saclist .= "c$n ";
1451             $t -= $pctr;
1452             $n++;
1453             }
1454             $$noid{"$R/saclist"} = $saclist;
1455             $$noid{"$R/siclist"} = "";
1456             $n--;
1457              
1458             dbunlock();
1459              
1460             #print "saclist: $$noid{"$R/saclist"}\nfinal top: "
1461             # . $$noid{"$R/c${n}/top"} . "\npercounter=$pctr\n";
1462             #foreach $c ($$saclist) {
1463             # print "$c, ";
1464             #}
1465             #print "\n";
1466             }
1467              
1468             # This routine produces a new identifier by taking a previously recycled
1469             # identifier from a queue (usually, a "used" identifier, but it might
1470             # have been pre-recycled) or by generating a brand new one.
1471             #
1472             # The $contact should be the initials or descriptive string to help
1473             # track who or what was happening at time of minting.
1474             #
1475             # Returns undef on error.
1476             #
1477             sub mint { my( $noid, $contact, $pepper )=@_;
1478              
1479             ! defined($contact) and
1480             addmsg($noid, "contact undefined"),
1481             return undef;
1482              
1483             ! $$noid{"$R/template"} and
1484             addmsg($noid, "error: this minter does not generate "
1485             . "identifiers (it does accept user-defined "
1486             . "identifier and element bindings)."),
1487             return undef;
1488             # Check if the head of the queue is ripe. See comments under queue()
1489             # for an explanation of how the queue works.
1490             #
1491             my $currdate = temper(); # fyi, 14 digits long
1492             my $first = "$R/q/";
1493             my $db = $opendbtab{"bdb/$noid"};
1494             ! (my $cursor = $db->db_cursor()) and
1495             addmsg($noid, "couldn't create cursor"),
1496             return undef;
1497              
1498             # The following is not a proper loop. Normally it should run once,
1499             # but several cycles may be needed to weed out anomalies with the id
1500             # at the head of the queue. If all goes well and we found something
1501             # to mint from the queue, the last line in the loop exits the routine.
1502             # If we drop out of the loop, it's because the queue wasn't ripe.
1503             #
1504             my ($id, $status, $key, $qdate, $circ_svec);
1505             while (1) {
1506             $key = $first;
1507             $status = $cursor->c_get($key, $id, DB_SET_RANGE);
1508             $status and
1509             addmsg($noid, "mint: c_get status/errno ($status/$!)"),
1510             return undef;
1511             # The cursor, key and value are now set at the first item
1512             # whose key is greater than or equal to $first. If the
1513             # queue was empty, there should be no items under "$R/q/".
1514             #
1515             ($qdate) = ($key =~ m|$R/q/(\d{14})|);
1516             ! defined($qdate) and # nothing in queue
1517             # this is our chance -- see queue() comments for why
1518             ($$noid{"$R/fseqnum"} > SEQNUM_MIN and
1519             $$noid{"$R/fseqnum"} = SEQNUM_MIN),
1520             last; # so move on
1521             # If the date of the earliest item to re-use hasn't arrived
1522             $currdate < $qdate and
1523             last; # move on
1524              
1525             # If we get here, head of queue is ripe. Remove from queue.
1526             # Any "next" statement from now on in this loop discards the
1527             # queue element.
1528             #
1529             $db->db_del($key);
1530             if ($$noid{"$R/queued"}-- <= 0) {
1531             my $m = "error: queued count (" . $$noid{"$R/queued"}
1532             . ") going negative on id $id";
1533             addmsg($noid, $m);
1534             logmsg($noid, $m);
1535             return(undef);
1536             }
1537              
1538             # We perform a few checks first to see if we're actually
1539             # going to use this identifier. First, if there's a hold,
1540             # remove it from the queue and check the queue again.
1541             #
1542             exists($$noid{"$id\t$R/h"}) and # if there's a hold
1543             $$noid{"$R/longterm"} && logmsg($noid, "warning: id "
1544             . "$id found in queue with a hold placed on "
1545             . "it -- removed from queue."),
1546             next;
1547             # yyy this means id on "hold" can still have a 'q' circ status?
1548              
1549             $circ_svec = get_circ_svec($noid, $id);
1550              
1551             $circ_svec =~ /^i/ and
1552             logmsg($noid, "error: id $id appears to have been "
1553             . "issued while still in the queue -- "
1554             . "circ record is " . $$noid{"$id\t$R/c"}),
1555             next
1556             ;
1557             $circ_svec =~ /^u/ and
1558             logmsg($noid, "note: id $id, marked as unqueued, is "
1559             . "now being removed/skipped in the queue -- "
1560             . "circ record is " . $$noid{"$id\t$R/c"}),
1561             next
1562             ;
1563             $circ_svec =~ /^([^q])/ and
1564             logmsg($noid, "error: id $id found in queue has an "
1565             . "unknown circ status ($1) -- "
1566             . "circ record is " . $$noid{"$id\t$R/c"}),
1567             next
1568             ;
1569              
1570             # Finally, if there's no circulation record, it means that
1571             # it was queued to get it minted earlier or later than it
1572             # would normally be minted. Log if term is "long".
1573             #
1574             $circ_svec eq "" and
1575             ($$noid{"$R/longterm"} && logmsg($noid, "note: "
1576             . "queued id $id coming out of queue on first "
1577             . "minting (pre-cycled)"))
1578             ;
1579              
1580             # If we get here, our identifier has now passed its tests.
1581             # Do final identifier signoff and return.
1582             #
1583             return(set_circ_rec($noid,
1584             $id, 'i' . $circ_svec, $currdate, $contact));
1585             }
1586              
1587             # If we get here, we're not getting an id from the queue.
1588             # Instead we have to generate one.
1589             #
1590             # As above, the following is not a proper loop. Normally it should
1591             # run once, but several cycles may be needed to weed out anomalies
1592             # with the generated id (eg, there's a hold on the id, or it was
1593             # queued to delay issue).
1594             #
1595             while (1) {
1596              
1597             # Next is the important seeding of random number generator.
1598             # We need this so that we get the same exact series of
1599             # pseudo-random numbers, just in case we have to wipe out a
1600             # generator and start over. That way, the n-th identifier
1601             # will be the same, no matter how often we have to start
1602             # over. This step has no effect when $generator_type ==
1603             # "sequential".
1604             #
1605             srand($$noid{"$R/oacounter"});
1606              
1607             # The id returned in this next step may have a "+" character
1608             # that n2xdig() appended to it. The checkchar() routine
1609             # will convert it to a check character.
1610             #
1611             $id = genid($noid);
1612             ! defined($id)
1613             and return undef;
1614              
1615             # Prepend NAAN and separator if there is a NAAN.
1616             #
1617             $$noid{"$R/firstpart"} and
1618             $id = $$noid{"$R/firstpart"} . $id;
1619              
1620             # Add check character if called for.
1621             #
1622             $$noid{"$R/addcheckchar"} and
1623             $id = &checkchar($id);
1624              
1625             # There may be a hold on an id, meaning that it is not to
1626             # be issued (or re-issued).
1627             #
1628             exists($$noid{"$id\t$R/h"}) and # if there's a hold
1629             next; # do genid() again
1630              
1631             # It's usual to find no circulation record. However,
1632             # there may be a circulation record if the generator term
1633             # is not "long" and we've wrapped (restarted) the counter,
1634             # of if it was queued before first minting. If the term
1635             # is "long", the generated id automatically gets a hold.
1636             #
1637             $circ_svec = get_circ_svec($noid, $id);
1638              
1639             # A little unusual is the case when something has a
1640             # circulation status of 'q', meaning it has been queued
1641             # before first issue, presumably to get it minted earlier or
1642             # later than it would normally be minted; if the id we just
1643             # generated is marked as being in the queue (clearly not at
1644             # the head of the queue, or we would have seen it in the
1645             # previous while loop), we go to generate another id. If
1646             # term is "long", log that we skipped this one.
1647             #
1648             $circ_svec =~ /^q/ and
1649             ($$noid{"$R/longterm"} && logmsg($noid,
1650             "note: will not issue genid()'d $id as it's "
1651             . "status is 'q', circ_rec is "
1652             . $$noid{"$id\t$R/c"})),
1653             next
1654             ;
1655              
1656             # If the circulation status is 'i' it means that the id is
1657             # being re-issued. This shouldn't happen unless the counter
1658             # has wrapped around to the beginning. If term is "long",
1659             # an id can be re-issued only if (a) its hold was released
1660             # and (b) it was placed in the queue (thus marked with 'q').
1661             #
1662             $circ_svec =~ /^i/ && ($$noid{"$R/longterm"}
1663             || ! $$noid{"$R/wrap"}) and
1664             logmsg($noid, "error: id $id cannot be "
1665             . "re-issued except by going through the "
1666             . "queue, circ_rec " . $$noid{"$id\t$R/c"}),
1667             next
1668             ;
1669             $circ_svec =~ /^u/ and
1670             logmsg($noid, "note: generating id $id, currently "
1671             . "marked as unqueued, circ record is "
1672             . $$noid{"$id\t$R/c"}),
1673             next
1674             ;
1675             $circ_svec =~ /^([^iqu])/ and
1676             logmsg($noid, "error: id $id has unknown circulation "
1677             . "status ($1), circ_rec "
1678             . $$noid{"$id\t$R/c"}),
1679             next
1680             ;
1681             #
1682             # Note that it's OK/normal if $circ_svec was an empty string.
1683              
1684             # If we get here, our identifier has now passed its tests.
1685             # Do final identifier signoff and return.
1686             #
1687             return(set_circ_rec($noid,
1688             $id, 'i' . $circ_svec, $currdate, $contact));
1689             }
1690             # yyy
1691             # Note that we don't assign any value to the very important key=$id.
1692             # What should it be bound to? Let's decide later.
1693              
1694             # yyy
1695             # Often we want to bind an id initially even if the object or record
1696             # it identifies is "in progress", as this gives way to begin tracking,
1697             # eg, back to the person responsible.
1698             #
1699             }
1700              
1701             # Record user (":/:/...") values in admin area.
1702             sub note { my( $noid, $contact, $key, $value )=@_;
1703             my $db = $opendbtab{"bdb/$noid"};
1704             dblock();
1705             my $status = $db->db_put("$R/$R/$key", $value);
1706             dbunlock();
1707             $$noid{"$R/longterm"} and
1708             logmsg($noid, "note: note attempt under $key by $contact"
1709             . ($status ? "" : " -- note failed"));
1710             if ($status) {
1711             addmsg($noid, "db->db_put status/errno ($status/$!)");
1712             return 0;
1713             }
1714             return 1;
1715             }
1716              
1717             # Convert a number to an extended digit according to $mask and $generator_type
1718             # and return (without prefix or NAAN). A $mask character of 'k' gets
1719             # converted to '+' in the returned string; post-processing will eventually
1720             # turn it into a computed check character.
1721             #
1722             sub n2xdig { my( $num, $mask )=@_;
1723             my $s = '';
1724             my ($div, $remainder, $c);
1725              
1726             # Confirm well-formedness of $mask before proceeding.
1727             #
1728             $mask !~ /^[rsz][de]+k?$/
1729             and return undef;
1730              
1731             my $varwidth = 0; # we start in fixed width part of the mask
1732             my @rmask = reverse(split(//, $mask)); # process each char in reverse
1733             while ($num != 0 || ! $varwidth) {
1734             if (! $varwidth) {
1735             $c = shift @rmask; # check next mask character,
1736             ! defined($c)
1737             || $c =~ /[rs]/ # terminate on r or s even if
1738             and last; # $num is not all used up yet
1739             $c =~ /e/ and
1740             $div = $alphacount
1741             or
1742             $c =~ /d/ and
1743             $div = $digitcount
1744             or
1745             $c =~ /z/ and
1746             $varwidth = 1 # re-uses last $div value
1747             and next
1748             or
1749             $c =~ /k/ and
1750             next
1751             ;
1752             #=for later
1753             ## why is this slower? should be faster since it does NOT use regexprs
1754             # ! defined($c) || # terminate on r or s even if
1755             # $c eq 'r' || $c eq 's'
1756             # and last; # $num is not all used up yet
1757             # $c eq 'e' and
1758             # $div = $alphacount
1759             # or
1760             # $c eq 'd' and
1761             # $div = $digitcount
1762             # or
1763             # $c eq 'z' and
1764             # $varwidth = 1 # re-uses last $div value
1765             # and next
1766             # or
1767             # $c eq 'k' and
1768             # next
1769             # ;
1770             #=cut
1771             }
1772             $remainder = $num % $div;
1773             $num = int($num / $div);
1774             $s = $xdig[$remainder] . $s;
1775             }
1776             $mask =~ /k$/ and # if it ends in a check character
1777             $s .= "+"; # represent it with plus in new id
1778             return $s;
1779             }
1780              
1781             # yyy templates should probably have names, eg, jk##.. could be jk4
1782             # or jk22, as in "./noid testdb/jk4 ... "
1783              
1784             # Reads template looking for errors and returns the total number of
1785             # identifiers that it is capable of generating, using NOLIMIT to mean
1786             # indefinite (unbounded). Returns 0 on error. Variables $prefix,
1787             # $mask, and $generator_type are output parameters.
1788             #
1789             # $message will always be set; 0 return with error, 1 return with synonym
1790              
1791             #
1792             sub parse_template { my( $template, $prefix, $mask, $gen_type, $message )=@_;
1793              
1794             my $dirname;
1795             my $msg = \$_[4]; # so we can modify $message argument easily
1796             $$msg = "";
1797              
1798             # Strip final spaces and slashes. If there's a pathname,
1799             # save directory and final component separately.
1800             #
1801             $template ||= "";
1802             $template =~ s|[/\s]+$||; # strip final spaces or slashes
1803             ($dirname, $template) = $template =~ m|^(.*/)?([^/]+)$|;
1804             $dirname ||= ""; # make sure $dirname is defined
1805              
1806             ! $template || $template eq "-" and
1807             $$msg = "parse_template: no minting possible.",
1808             $_[1] = $_[2] = $_[3] = "",
1809             return NOLIMIT;
1810             $template !~ /^([^\.]*)\.(\w+)/ and
1811             $$msg = "parse_template: no template mask - "
1812             . "can't generate identifiers.",
1813             return 0;
1814             ($prefix, $mask) = ($1 || "", $2);
1815              
1816             $mask !~ /^[rsz]/ and
1817             $$msg = "parse_template: mask must begin with one of "
1818             . "the letters\n'r' (random), 's' (sequential), "
1819             . "or 'z' (sequential unlimited).",
1820             return 0;
1821              
1822             $mask !~ /^.[^k]+k?$/ and
1823             $$msg = "parse_template: exactly one check character "
1824             . "(k) is allowed, and it may\nonly appear at the "
1825             . "end of a string of one or more mask characters.",
1826             return 0;
1827              
1828             $mask !~ /^.[de]+k?$/ and
1829             $$msg = "parse_template: a mask may contain only the "
1830             . "letters 'd' or 'e'.",
1831             return 0;
1832              
1833             # Check prefix for errors.
1834             #
1835             my $c;
1836             my $has_cc = ($mask =~ /k$/);
1837             for $c (split //, $prefix) {
1838             if ($has_cc && $c ne '/' && ! exists($ordxdig{$c})) {
1839             $$msg = "parse_template: with a check character "
1840             . "at the end, a mask may contain only "
1841             . qq@characters from "$legalstring".@;
1842             return 0;
1843             }
1844             }
1845              
1846             # If we get here, the mask is well-formed. Now try to come up with
1847             # a short synonym for the template; it should start with the
1848             # template's prefix and then an integer representing the number of
1849             # letters in identifiers generated by the template. For example,
1850             # a template of "ft.rddeek" would be "ft5".
1851             #
1852             my $masklen = length($mask) - 1; # subtract one for [rsz]
1853             $$msg = $prefix . $masklen;
1854             $mask =~ /^z/ and # "+" indicates length can grow
1855             $$msg .= "+";
1856              
1857             # r means random;
1858             # s means sequential, limited;
1859             # z means sequential, no limit, and repeat most significant mask
1860             # char as needed;
1861              
1862             my $total = 1;
1863             for $c (split //, $mask) {
1864             # Mask chars it could be are: d e k
1865             $c =~ /e/ and
1866             $total *= $alphacount
1867             or
1868             $c =~ /d/ and
1869             $total *= $digitcount
1870             or
1871             $c =~ /[krsz]/ and
1872             next
1873             ;
1874             }
1875              
1876             $_[1] = $prefix;
1877             $_[2] = $mask;
1878             $_[3] = $gen_type = ($mask =~ /^r/ ? "random" : "sequential");
1879             # $_[4] was set to the synonym already
1880             return ($mask =~ /^z/ ? NOLIMIT : $total);
1881             }
1882              
1883             # An identifier may be queued to be issued/minted. Usually this is used
1884             # to recycle a previously issued identifier, but it may also be used to
1885             # delay or advance the birth of an identifier that would normally be
1886             # issued in its own good time. The $when argument may be "first", "lvf",
1887             # "delete", or a number and a letter designating units of seconds ('s',
1888             # the default) or days ('d') which is a delay added to the current time;
1889             # a $when of "now" means use the current time with no delay.
1890              
1891             # The queue is composed of keys of the form $R/q/$qdate/$seqnum/$paddedid,
1892             # with the correponding values being the actual queued identifiers. The
1893             # Btree allows us to step sequentially through the queue in an ordering
1894             # that is a side-effect of our key structure. Left-to-right, it is
1895             #
1896             # :/q/ $R/q/, 4 characters wide
1897             # $qdate 14 digits wide, or 14 zeroes if "first" or "lvf"
1898             # $seqnum 6 digits wide, or 000000 if "lvf"
1899             # $paddedid id "value", zero-padded on left, for "lvf"
1900             #
1901             # The $seqnum is there to help ensure queue order for up to a million queue
1902             # requests in a second (the granularity of our clock). [ yyy $seqnum would
1903             # probably be obviated if we were using DB_DUP, but there's much conversion
1904             # involved with that ]
1905             #
1906             # We base our $seqnum (min is 1) on one of two stored sources: "fseqnum"
1907             # for queue "first" requests or "gseqnum" for queue with a real time stamp
1908             # ("now" or delayed). To implement queue "first", we use an artificial
1909             # time stamp of all zeroes, just like for "lvf"; to keep all "lvf" sorted
1910             # before "first" requests, we reset fseqnum and gseqnum to 1 (not zero).
1911             # We reset gseqnum whenever we use it at a different time from last time
1912             # since sort order will be guaranteed by different values of $qdate. We
1913             # don't have that guarantee with the all-zeroes time stamp and fseqnum,
1914             # so we put off resetting fseqnum until it is over 500,000 and the queue
1915             # is empty, so we do then when checking the queue in mint().
1916             #
1917             # This key structure should ensure that the queue is sorted first by date.
1918             # As long as fewer than a million queue requests come in within a second,
1919             # we can make sure queue ordering is fifo. To support "lvf" (lowest value
1920             # first) recycling, the $date and $seqnum fields are all zero, so the
1921             # ordering is determined entirely by the numeric "value" of identifier
1922             # (really only makes sense for a sequential generator); to achieve the
1923             # numeric sorting in the lexical Btree ordering, we strip off any prefix,
1924             # right-justify the identifier, and zero-pad on the left to create a number
1925             # that is 16 digits wider than the Template mask [yyy kludge that doesn't
1926             # take any overflow into account, or bigints for that matter].
1927             #
1928             # Returns the array of corresponding strings (errors and "id:" strings)
1929             # or an empty array on error.
1930             #
1931             sub queue { my( $noid, $contact, $when, @ids )=@_;
1932              
1933             ! $$noid{"$R/template"} and
1934             addmsg($noid,
1935             "error: queuing makes no sense in a bind-only minter."),
1936             return(());
1937             ! defined($contact) and
1938             addmsg($noid, "error: contact undefined"),
1939             return(());
1940             ! defined($when) || $when !~ /\S/ and
1941             addmsg($noid, "error: queue when? (eg, first, lvf, 30d, now)"),
1942             return(());
1943             # yyy what is sensible thing to do if no ids are present?
1944             scalar(@ids) < 1 and
1945             addmsg($noid, "error: must specify at least one id to queue."),
1946             return(());
1947             my ($seqnum, $delete) = (0, 0, 0);
1948             my ($fixsqn, $qdate); # purposely undefined
1949              
1950             # You can express a delay in days (d) or seconds (s, default).
1951             #
1952             if ($when =~ /^(\d+)([ds]?)$/) { # current time plus a delay
1953             # The number of seconds in one day is 86400.
1954             my $multiplier = (defined($2) && $2 eq "d" ? 86400 : 1);
1955             $qdate = temper(time() + $1 * $multiplier);
1956             }
1957             elsif ($when eq "now") { # a synonym for current time
1958             $qdate = temper(time());
1959             }
1960             elsif ($when eq "first") {
1961             # Lowest value first (lvf) requires $qdate of all zeroes.
1962             # To achieve "first" semantics, we use a $qdate of all
1963             # zeroes (default above), which means this key will be
1964             # selected even earlier than a key that became ripe in the
1965             # queue 85 days ago but wasn't selected because no one
1966             # minted anything in the last 85 days.
1967             #
1968             $seqnum = $$noid{"$R/fseqnum"};
1969             #
1970             # NOTE: fseqnum is reset only when queue is empty; see mint().
1971             # If queue never empties fseqnum will simply keep growing,
1972             # so we effectively truncate on the left to 6 digits with mod
1973             # arithmetic when we convert it to $fixsqn via sprintf().
1974             }
1975             elsif ($when eq "delete") {
1976             $delete = 1;
1977             }
1978             elsif ($when ne "lvf") {
1979             addmsg($noid, "error: unrecognized queue time: $when");
1980             return(());
1981             }
1982              
1983             defined($qdate) and # current time plus optional delay
1984             ($qdate > $$noid{"$R/gseqnum_date"} and
1985             $seqnum = $$noid{"$R/gseqnum"} = SEQNUM_MIN,
1986             $$noid{"$R/gseqnum_date"} = $qdate,
1987             1 or
1988             $seqnum = $$noid{"$R/gseqnum"}),
1989             1 or
1990             $qdate = "00000000000000", # this needs to be 14 zeroes
1991             1;
1992              
1993             my $iderror = "";
1994             $$noid{"$R/genonly"} and
1995             ($iderror = validate($noid, "-", @ids)) !~ /error:/ and
1996             $iderror = "";
1997             $iderror and
1998             addmsg($noid, "error: queue operation not started -- one or "
1999             . "more ids did not validate:\n$iderror"),
2000             return(());
2001             my $firstpart = $$noid{"$R/firstpart"};
2002             my $padwidth = $$noid{"$R/padwidth"};
2003             my $currdate = temper();
2004             my (@retvals, $m, $idval, $paddedid, $circ_svec);
2005             for my $id (@ids) {
2006             exists($$noid{"$id\t$R/h"}) and # if there's a hold
2007             $m = qq@error: a hold has been set for "$id" and @
2008             . "must be released before the identifier can "
2009             . "be queued for minting.",
2010             logmsg($noid, $m),
2011             push(@retvals, $m),
2012             next
2013             ;
2014              
2015             # If there's no circulation record, it means that it was
2016             # queued to get it minted earlier or later than it would
2017             # normally be minted. Log if term is "long".
2018             #
2019             $circ_svec = get_circ_svec($noid, $id);
2020              
2021             $circ_svec =~ /^q/ && ! $delete and
2022             $m = "error: id $id cannot be queued since "
2023             . "it appears to be in the queue already -- "
2024             . "circ record is " . $$noid{"$id\t$R/c"},
2025             logmsg($noid, $m),
2026             push(@retvals, $m),
2027             next
2028             ;
2029             $circ_svec =~ /^u/ && $delete and
2030             $m = "error: id $id has been unqueued already -- "
2031             . "circ record is " . $$noid{"$id\t$R/c"},
2032             logmsg($noid, $m),
2033             push(@retvals, $m),
2034             next
2035             ;
2036             $circ_svec !~ /^q/ && $delete and
2037             $m = "error: id $id cannot be unqueued since its circ "
2038             . "record does not indicate its being queued, "
2039             . "circ record is " . $$noid{"$id\t$R/c"},
2040             logmsg($noid, $m),
2041             push(@retvals, $m),
2042             next
2043             ;
2044             # If we get here and we're deleting, circ_svec must be 'q'.
2045              
2046             $circ_svec eq "" and
2047             ($$noid{"$R/longterm"} && logmsg($noid, "note: "
2048             . "id $id being queued before first "
2049             . "minting (to be pre-cycled)")),
2050             1 or
2051             $circ_svec =~ /^i/ and
2052             ($$noid{"$R/longterm"} && logmsg($noid, "note: "
2053             . "longterm id $id being queued for re-issue"))
2054             ;
2055              
2056             # yyy ignore return OK?
2057             set_circ_rec($noid, $id,
2058             ($delete ? 'u' : 'q') . $circ_svec,
2059             $currdate, $contact);
2060              
2061             ($idval = $id) =~ s/^$firstpart//;
2062             $paddedid = sprintf("%0$padwidth" . "s", $idval);
2063             $fixsqn = sprintf("%06d", $seqnum % SEQNUM_MAX);
2064              
2065             dblock();
2066              
2067             $$noid{"$R/queued"}++;
2068             if ($$noid{"$R/total"} != NOLIMIT # if total is non-zero
2069             && $$noid{"$R/queued"} > $$noid{"$R/oatop"}) {
2070              
2071             dbunlock();
2072              
2073             $m = "error: queue count (" . $$noid{"$R/queued"}
2074             . ") exceeding total possible on id $id. "
2075             . "Queue operation aborted.";
2076             logmsg($noid, $m);
2077             push @retvals, $m;
2078             last;
2079             }
2080             $$noid{"$R/q/$qdate/$fixsqn/$paddedid"} = $id;
2081              
2082             dbunlock();
2083              
2084             $$noid{"$R/longterm"} and
2085             logmsg($noid, "id: "
2086             . $$noid{"$R/q/$qdate/$fixsqn/$paddedid"}
2087             . " added to queue under "
2088             . "$R/q/$qdate/$seqnum/$paddedid");
2089             push @retvals, "id: $id";
2090             $seqnum and # it's zero for "lvf" and "delete"
2091             $seqnum++;
2092             }
2093             dblock();
2094             $when eq "first" and
2095             $$noid{"$R/fseqnum"} = $seqnum,
2096             1 or
2097             $qdate > 0 and
2098             $$noid{"$R/gseqnum"} = $seqnum,
2099             1;
2100             dbunlock();
2101             return(@retvals);
2102             }
2103              
2104             # Generate a sample id for testing purposes.
2105             sub sample{ my( $noid, $num )=@_;
2106              
2107             my $upper;
2108             ! defined($num) and
2109             $upper = $$noid{"$R/total"},
2110             ($upper == NOLIMIT and $upper = 100000),
2111             $num = int(rand($upper));
2112             my $mask = $$noid{"$R/mask"};
2113             my $firstpart = $$noid{"$R/firstpart"};
2114             my $func = ($$noid{"$R/addcheckchar"} ? \&checkchar : \&echo);
2115             return &$func($firstpart . n2xdig($num, $mask));
2116             }
2117              
2118             sub scope { my( $noid )=@_;
2119              
2120             ! $$noid{"$R/template"} and
2121             print("This minter does not generate identifiers, but it\n"
2122             . "does accept user-defined identifier and element "
2123             . "bindings.\n");
2124             my $total = $$noid{"$R/total"};
2125             my $totalstr = human_num($total);
2126             my $naan = $$noid{"$R/naan"} || "";
2127             $naan and
2128             $naan .= "/";
2129              
2130             my ($prefix, $mask, $gen_type) =
2131             ($$noid{"$R/prefix"}, $$noid{"$R/mask"}, $$noid{"$R/generator_type"});
2132              
2133             print "Template ", $$noid{"$R/template"}, " will yield ",
2134             ($total < 0 ? "an unbounded number of" : $totalstr),
2135             " $gen_type unique ids\n";
2136             my $tminus1 = ($total < 0 ? 987654321 : $total - 1);
2137              
2138             # See if we need to compute a check character.
2139             my $func = ($$noid{"$R/addcheckchar"} ? \&checkchar : \&echo);
2140             print
2141             "in the range " . &$func($naan . &n2xdig( 0, $mask)) .
2142             ", " . &$func($naan . &n2xdig( 1, $mask)) .
2143             ", " . &$func($naan . &n2xdig( 2, $mask));
2144             28 < $total - 1 and print
2145             ", ..., " . &$func($naan . &n2xdig(28, $mask));
2146             29 < $total - 1 and print
2147             ", " . &$func($naan . &n2xdig(29, $mask));
2148             print
2149             ", ... up to "
2150             . &$func($naan . &n2xdig($tminus1, $mask))
2151             . ($total < 0 ? " and beyond.\n" : ".\n")
2152             ;
2153             $mask !~ /^r/ and
2154             return 1;
2155             print "A sampling of random values (may already be in use): ";
2156             my $i = 5;
2157             print sample($noid) . " "
2158             while ($i-- > 0);
2159             print "\n";
2160             return 1;
2161             }
2162              
2163             # Return local date/time stamp in TEMPER format. Use supplied time (in seconds)
2164             # if any, or the current time.
2165             #
2166             sub temper { my( $time )=@_;
2167              
2168             my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdat)
2169             = localtime(defined($time) ? $time : time());
2170             $year += 1900; # add the missing the century
2171             $mon++; # zero-based, so increment
2172             return sprintf("%04.4s%02.2s%02.2s%02.2s%02.2s%02.2s",
2173             $year, $mon, $mday, $hour, $min, $sec);
2174             }
2175              
2176             # Check that identifier matches a given template, where "-" means the
2177             # default template for this generator. This is a complete check of all
2178             # characteristics _except_ whether the identifier is stored in the
2179             # database.
2180             #
2181             # Returns an array of strings that are messages corresponding to any ids
2182             # that were passed in. Error strings # that pertain to identifiers
2183             # begin with "iderr: ".
2184             #
2185             sub validate { my( $noid, $template, @ids )=@_;
2186              
2187             my ($first, $prefix, $mask, $gen_type, $msg);
2188             my @retvals;
2189              
2190             ! @ids and
2191             addmsg($noid, "error: must specify a template and at least "
2192             . "one identifier."),
2193             return(());
2194             ! defined($template) and
2195             # If $noid is undefined, the caller looks in errmsg(undef).
2196             addmsg($noid, "error: no template given to validate against."),
2197             return(());
2198              
2199             if ($template eq "-") {
2200             ($prefix, $mask) = ($$noid{"$R/prefix"}, $$noid{"$R/mask"});
2201             # push(@retvals, "template: " . $$noid{"$R/template"});
2202             if (! $$noid{"$R/template"}) { # do blanket validation
2203             my @nonulls = grep(s/^(.)/id: $1/, @ids);
2204             ! @nonulls and
2205             return(());
2206             push(@retvals, @nonulls);
2207             return(@retvals);
2208             }
2209             }
2210             elsif (! parse_template($template, $prefix, $mask, $gen_type, $msg)) {
2211             addmsg($noid, "error: template $template bad: $msg");
2212             return(());
2213             }
2214              
2215             my ($id, @maskchars, $c, $m, $varpart);
2216             my $should_have_checkchar = (($m = $mask) =~ s/k$//);
2217             my $naan = $$noid{"$R/naan"};
2218             ID: for $id (@ids) {
2219             ! defined($id) || $id =~ /^\s*$/ and
2220             push(@retvals,
2221             "iderr: can't validate an empty identifier"),
2222             next;
2223              
2224             # Automatically reject ids starting with "$R/", unless it's an
2225             # "idmap", in which case automatically validate. For an idmap,
2226             # the $id should be of the form $R/idmap/ElementName, with
2227             # element, Idpattern, and value, ReplacementPattern.
2228             #
2229             $id =~ m|^$R/| and
2230             push(@retvals, ($id =~ m|^$R/idmap/.+|
2231             ? "id: $id"
2232             : "iderr: identifiers must not start"
2233             . qq@ with "$R/".@)),
2234             next;
2235              
2236             $first = $naan; # ... if any
2237             $first and
2238             $first .= "/";
2239             $first .= $prefix; # ... if any
2240             ($varpart = $id) !~ s/^$first// and
2241             #yyy ($varpart = $id) !~ s/^$prefix// and
2242             push(@retvals, "iderr: $id should begin with $first."),
2243             next;
2244             # yyy this checkchar algorithm will need an arg when we
2245             # expand into other alphabets
2246             $should_have_checkchar && ! checkchar($id) and
2247             push(@retvals, "iderr: $id has a check character error"),
2248             next;
2249             ## xxx fix so that a length problem is reported before (or
2250             # in addition to) a check char problem
2251              
2252             # yyy needed?
2253             #length($first) + length($mask) - 1 != length($id)
2254             # and push(@retvals,
2255             # "error: $id has should have length "
2256             # . (length($first) + length($mask) - 1)
2257             # and next;
2258              
2259             # Maskchar-by-Idchar checking.
2260             #
2261             @maskchars = split(//, $mask);
2262             shift @maskchars; # toss 'r', 's', or 'z'
2263             for $c (split(//, $varpart)) {
2264             ! defined($m = shift @maskchars) and
2265             push(@retvals, "iderr: $id longer than "
2266             . "specified template ($template)"),
2267             next ID;
2268             $m =~ /e/ && $legalstring !~ /$c/ and
2269             push(@retvals, "iderr: $id char '$c' conflicts"
2270             . " with template ($template)"
2271             . " char '$m' (extended digit)"),
2272             next ID
2273             or
2274             $m =~ /d/ && '0123456789' !~ /$c/ and
2275             push(@retvals, "iderr: $id char '$c' conflicts"
2276             . " with template ($template)"
2277             . " char '$m' (digit)"),
2278             next ID
2279             ; # or $m =~ /k/, in which case skip
2280             }
2281             defined($m = shift @maskchars) and
2282             push(@retvals, "iderr: $id shorter "
2283             . "than specified template ($template)"),
2284             next ID;
2285              
2286             # If we get here, the identifier checks out.
2287             push(@retvals, "id: $id");
2288             }
2289             return(@retvals);
2290             }
2291              
2292             1;
2293              
2294             __END__