File Coverage

blib/lib/Biblio/bp/lib/bp.pl
Criterion Covered Total %
statement 177 321 55.1
branch 71 216 32.8
condition 12 24 50.0
subroutine 10 14 71.4
pod n/a
total 270 575 46.9


line stmt bran cond sub pod time code
1             #
2             # bibliography package for Perl
3             #
4             # The main package.
5             #
6             # The bp package is written by Dana Jacobsen (dana@acm.org).
7             # Copyright 1992-1996 by Dana Jacobsen.
8             #
9             # Permission is given to use and distribute this package without charge.
10             # The author will not be liable for any damage caused by the package, nor
11             # are any warranties implied.
12             #
13             # 26 March 1996
14              
15             package bib;
16              
17             $glb_version = '0.2.97 (19 Dec 96)';
18              
19             # See the file NOTES in the distribution for additional notes.
20              
21             #
22             #
23             # Major functions available for users to call:
24             #
25             #
26             # format();
27             # format($format);
28             # format($input_format, $output_format);
29             #
30             # open($file_name);
31             # open($file_name, $format);
32             #
33             # close();
34             # close($file_name);
35             #
36             # read();
37             # read($file_name);
38             # read($file_name, $format);
39             #
40             # write($file, $output_string);
41             # write($file, $output_string, $format);
42             #
43             # convert($record);
44             #
45             # explode($record);
46             # explode($record, $file_name);
47             #
48             # implode(%record);
49             # implode(%record, $file_name);
50             #
51             # tocanon(%record);
52             # tocanon(%record, $file_name);
53             #
54             # fromcanon(%record);
55             # fromcanon(%record, $file_name);
56             #
57             # clear();
58             # clear($file_name);
59             #
60             # [ file bp-p-option ]
61             #
62             # stdargs(@ARGV)
63             #
64             # options($general_opts, $converter_opts, $infmt_opts, $outfmt_opts);
65             #
66             # doc($what);
67             #
68             #
69             # Functions available primarily for modules to call:
70             #
71             #
72             # parse_format($format_string)
73             #
74             # [ file bp-p-debug ]
75             #
76             # panic($string);
77             #
78             # debugs($statement, $level);
79             # debugs($statement, $level, $module);
80             #
81             # check_consist();
82             #
83             # debug_dump($what_kind);
84             #
85             # ok_print($variable);
86             #
87             # [ file bp-p-errors ]
88             #
89             # errors($warning_level);
90             # errors($warning_level, $error_level);
91             # errors($warning_level, $error_level, $header_string);
92             #
93             # goterror($error_message);
94             # goterror($error_message, $linenum);
95             #
96             # gotwarn($warning_message);
97             # gotwarn($warning_message, $linenum);
98             #
99             # [ file bp-p-dload ]
100             #
101             # load_format($format_name);
102             #
103             # load_charset($charset_name);
104             #
105             # load_converter($converter_name);
106             #
107             # find_bp_files();
108             # find_bp_files($rehash);
109             #
110             # reg_format($long_name, $short_name, $pkg_name, $charset_name, @info);
111             #
112             # [ file bp-p-cs ]
113             #
114             # unicode_to_canon($unicode);
115             #
116             # canon_to_unicode($character);
117             #
118             # decimal_to_unicode($number);
119             #
120             # unicode_to_decimal($unicode);
121             #
122             # unicode_name($unicode);
123             #
124             # meta_name($metacode);
125             #
126             # meta_approx($metacode);
127             #
128             # unicode_approx($unicode);
129             #
130             # nocharset($string);
131             #
132             # [ file bp-p-util ]
133             #
134             # bp_util'mname_to_canon($names_string);
135             # bp_util'mname_to_canon($names_string, $flag_reverse_author);
136             #
137             # bp_util'name_to_canon($name_string);
138             # bp_util'name_to_canon($name_string, $flag_reverse_author);
139             #
140             # bp_util'canon_to_name($name_string);
141             # bp_util'canon_to_name($name_string, $how_formatted);
142             #
143             # bp_util'parsename($name_string);
144             # bp_util'parsename($name_string, $how_formatted);
145             #
146             # bp_util'parsedate($date_string);
147             #
148             # bp_util'canon_month($month_string);
149             #
150             # bp_util'genkey(%canon_record);
151             #
152             # bp_util'regkey($key);
153             #
154             #
155             # Internal functions:
156             #
157             #
158             # close_input($file_name);
159             #
160             # close_output($file_name);
161             #
162             # [ file bp-p-debug ]
163             #
164             # log2($number);
165             #
166             # [ file bp-p-option ]
167             #
168             # parse_num_option($value);
169             #
170             # parse_option($option);
171             #
172             # [ file bp-p-stdbib ]
173             #
174             # open_stdbib($file_name);
175             #
176             # close_stdbib($file_name);
177             #
178             # read_stdbib($file_name);
179             #
180             # write_stdbib($file_name, $output_string);
181             #
182             # clear_stdbib();
183             #
184             # options_stdbib();
185             #
186             # implode_stdbib();
187             #
188             # explode_stdbib();
189             #
190             # tocanon_stdbib();
191             #
192             # fromcanon_stdbib();
193             #
194              
195              
196              
197             # Global variables. Most of these do not need to be initialized, but this
198             # guarantees that they are, and also lists all of them. Anything not here is
199             # an error!
200              
201             #
202             # Here are the functions we expect format modules to have
203             #
204              
205             @glb_expfuncs = ('options', 'open', 'close', 'read', 'write',
206             'explode', 'implode', 'tocanon', 'fromcanon',
207             'clear' );
208             #
209             # The higher the number, the fewer the messages. Generally:
210             # 2 - 10 multiple messages per line
211             # 10 - 100 one message per line
212             # 100 - 1000 one message per record
213             # 1000 - 70000 major routines
214             #
215             # debugging of 2 will print all messages, and additionally will turn on
216             # debug dumping of all globals each time check_consist is called.
217             #
218             # Do not use 0 or 1. They mean false and true, respectively.
219              
220             $glb_debug = 0;
221             $glb_moddebug = 0;
222             $glb_debug = $glb_moddebug = 1 if $^W;
223              
224             #
225             # This is the prefix to use when looking for files. Eventually this will be
226             # "bp/" which means we look in a subdirectory, but for development it's
227             # easier to leave everything at the top level.
228             #
229             $glb_bpprefix = 'bp-';
230              
231             #
232             # informat and outformat are the arguments used to the 'format' subroutine.
233             # they are set only in the format routine, and used to determine the proper
234             # routines to call with 'options', 'open'.
235             #
236             $glb_Iformat = '';
237             $glb_Oformat = '';
238              
239             #
240             # The current format in use. This is set right before calls to module
241             # routines. It is used especially by the stdbib stuff, so we can have one
242             # routine to support multiple formats. It doesn't do anything different
243             # depending on the format, but it would like to know the name.
244             #
245             $glb_current_fmt = undef;
246             $glb_current_cset = undef;
247              
248             #
249             # The current file handle. Since we keep input and output file handles
250             # seperate, it is nice to just have this available, since the calling
251             # function already determines it. It is _not_ available for explode, implode,
252             # tocanon, fromcanon, and clear, as they should not do any I/O to any given
253             # file (indeed, there may be no file at all). Obviously, this is also not
254             # set for open, since its job is to set it!
255             $glb_current_fh = undef;
256              
257             #
258             # rfmt is the real format of a particular file.
259             # rcset is the real character set of a particular file.
260             #
261             # The open routine will set these appropriately, using the information
262             # gleaned in the auto module to set these to the real values it determines,
263             # if the file is opened with 'auto' format.
264             #
265             # Since they are the real formats, they are used when calling 'explode',
266             # 'implode', 'tocanon', 'fromcanon', and 'clear'.
267             #
268             # The auto package only sets these to a format approved by load_format.
269             #
270             # XXXXX clear should call auto'clear which then calls real clear.
271             #
272             %glb_Irfmt = ();
273             %glb_Ircset = ();
274              
275             %glb_Orfmt = ();
276             %glb_Orcset = ();
277              
278             #
279             # This maps the name of a file to the file pointer, so we can happily read
280             # and write files to these names.
281             #
282             %glb_Ifilemap = ();
283             %glb_Ofilemap = ();
284              
285             #
286             # The current file names.
287             # An open, read, or write will set it, and a close will undefine it.
288             # These are initialized to STDIN and STDOUT.
289             #
290             #$glb_Ifilename = '-';
291             #$glb_Ofilename = '>>-';
292             # XXXXX initialize to undef, because we don't know anything yet.
293             $glb_Ifilename = undef;
294             $glb_Ofilename = undef;
295              
296             #
297             # The map of input files and record numbers.
298             # XXXXX how about an output one also?
299             # XXXXX It would be nice to be able to return this value to the user
300             #
301             %glb_filelocmap = ();
302              
303             #
304             # A more verbose location. This is set by packages, which need to keep
305             # track of their own information, and just set this when they need to.
306             # the error routines will print this out.
307             # It will get undef'd any time filename is changed (open, read, close).
308             #
309             # This is particularly useful for pointing to the first line of a record.
310             # See bp-refer.pl for an example of how to use this.
311             #
312             $glb_vloc = undef;
313              
314             #
315             # This is the variable used to map functions. The key is the name of the
316             # format, a comma, then the name of the function. For example,
317             # $formats{'bibtex', "write"}
318             # would be set to the name of the routine that closes BibTeX files.
319             #
320             %formats = ();
321              
322             #
323             # This variable holds information about special converters, which will get
324             # loaded and invoked when the 'convert' call is used. The first time we
325             # try conversion with glb_cvtname defined, we try to load it. If we can't
326             # find it, we undef glb_cvtname, which means we bypass the check from now
327             # on (at least until we call format again).
328             #
329             %special_converters = ();
330              
331             #
332             # This handles the name we use to find a converter. It's set in 'format'.
333             #
334             $glb_cvtname = undef;
335              
336             #
337             # This is the list of character sets.
338             #
339             %charsets = ();
340              
341             #
342             # We use this variable to perform indirect function calls.
343             # It can _never_ be counted on to contain anything or to retain its value.
344             #
345             $func = '';
346              
347             #
348             # The error global variables
349             #
350             # The default error settings are to die immediately on an error,
351             # and print all warnings immediately. Setting the level of
352             # errors to delay or ignore could cause a lot of headaches.
353             #
354             $glb_error_level = 3;
355             $glb_warn_level = 2;
356             # If this is set, we also store the error/warn location in the delay string.
357             $glb_error_saveline = 0;
358             # Variables we keep track of totals in. &errors('clear') will flush these.
359             $glb_num_errors = 0;
360             $glb_num_warns = 0;
361             $glb_str_errors = undef;
362             $glb_str_warns = undef;
363              
364             #
365             # The supported formats and character sets.
366             # It is used by find_bp_files to cache the information, so always call that
367             # function -- never use this variable!
368             #
369             $glb_supported_files = undef;
370              
371             #
372             # Character set stuff. We define an escape character, then some definitions
373             # that use this escape character. Sort of a markup language.
374             #
375             # XXXXX Disadvantages of using non-printable escape character:
376             # You can't type it into most editors.
377             # It looks funny if you try to view it.
378             # Advantages:
379             # It saves time by not having to escape user characters very often.
380             # We won't get into regex trouble by using it (cf. "$")
381             #
382             $cs_escape = "\034";
383             $cs_char_escape = $cs_escape . 'e';
384             $cs_sep = $cs_escape . '/';
385             $cs_sep2 = $cs_escape . ',';
386             $cs_temp = $cs_escape . 't'; # temporary character not ever in text
387             $cs_ext = $cs_escape . 'n'; # (followed by the Unicode entry in hex)
388             # (e.g. en00A5 --> Yen sign)
389             $cs_meta = $cs_escape . 'm'; # (followed by our meta table entry in hex)
390             # (e.g. em00A4 --> font change italics)
391              
392             # That's all the cs stuff. We use 8bit characters internally.
393              
394              
395             ##################
396             #
397             # Options
398             #
399             ##################
400              
401             # 0: don't ever call cs routines, 1: call them
402             $opt_CSConvert = 1;
403              
404             # 0: don't protect output characters, 1: protect. e.g. in TeX '#' -> '\#'
405             $opt_CSProtect = 1;
406              
407             # XXXXX We use a search string given by each charset. This should handle
408             # even the case where a format has a different 7 bit mapping.
409              
410             # The default debugging for perl -w, or 'debugging=on'
411             $opt_default_debug_level = 8000;
412              
413              
414              
415             require "${glb_bpprefix}p-debug.pl";
416             # loads:
417             # bib'assert
418             # bib'panic
419             # bib'debugs
420             # bib'check_consist
421             # bib'debug_dump
422             # bib'okprint
423              
424             require "${glb_bpprefix}p-errors.pl";
425             # loads:
426             # bib'errors
427             # bib'goterror
428             # bib'gotwarn
429              
430              
431             ######
432             #
433             # formats are in the form: "format:cset"
434             #
435             # There is a possibility that this will change, but probably just to add
436             # versions. The format routine and the auto module make these strings.
437             # A few of the routines now just use split(/:/, $format) instead of calling
438             # this routine since it's much faster.
439             #
440             sub parse_format {
441 558     558   971 local($pformat) = @_;
442              
443 558 50       1091 &panic("parse_format called with no arguments") unless defined $pformat;
444              
445 558         1650 &debugs("parse_format: $pformat", 32);
446              
447 558         2280 split(/:/, $pformat);
448             }
449              
450              
451             ######
452              
453             sub format {
454 144     144   311 local($ifmt, $ofmt) = @_;
455 144         260 local($success) = 0;
456 144         208 local($fmti, $fmto, $cset);
457              
458             # if called with no arguments, return our formats.
459 144 50 66     341 if ( (!defined $ifmt) && (!defined $ofmt) ) {
460 3         15 return ($glb_Iformat, $glb_Oformat);
461             }
462              
463 141 100       469 $ofmt = $ifmt unless defined $ofmt;
464              
465             # XXXXX make sure this is ok. What is Iformat is set to 'bibtex:troff' and
466             # I then call format with 'bibtex' as my format? Presumably I would
467             # want Iformat set to 'bibtex:tex' now, yes?
468              
469 141 100 100     712 if ( ($glb_Iformat =~ /^$ifmt:/) && ($glb_Oformat =~ /^$ofmt:/) ) {
470 2         10 return 1;
471             }
472              
473 139         591 &debugs("format ( $ifmt -> $ofmt )", 32768);
474              
475 139         378 $glb_cvtname = undef; # we don't want to call some strange converter!
476              
477             # special: if ifmt or ofmt is a null string, then we want to leave
478             # the current setting alone!
479              
480 139 50       571 if ($ifmt eq '') {
    50          
481 0         0 $success++;
482 0         0 ($fmti, $cset) = &parse_format($glb_Iformat);
483             } elsif ( ($fmti, $cset) = &load_format($ifmt) ) {
484             # XXXXX should implement this, or at least hook into it
485 139 50 66     392 return &goterror("auto charset recognition is unimplemented")
486             if ( ($cset eq 'auto') && ($fmti ne 'auto') );
487 139         145 $success++;
488 139         279 $glb_Iformat = "$fmti:$cset";
489             # XXXXX Should we open stdin here?
490             }
491              
492 139 50       689 if ($ofmt eq '') {
    50          
493 0         0 $success++;
494 0         0 ($fmto, $cset) = &parse_format($glb_Oformat);
495             } elsif ( ($fmto, $cset) = &load_format($ofmt) ) {
496 139 50 66     389 return &goterror("auto charset recognition is unimplemented")
497             if ( ($cset eq 'auto') && ($fmto ne 'auto') );
498 139         136 $success++;
499 139         231 $glb_Oformat = "$fmto:$cset";
500             # XXXXX open STDOUT to our format. Right? This is a re-open.
501             # 17 Nov 95, changed to >>- from >-.
502 139 100       419 &open('>>-') if $fmto ne "auto";
503             }
504              
505             # If we have a second format and we're successful, then set converter name.
506             # The name looks like "ref2btx", or "ins2mrc". This will be set to undef
507             # if we don't have a safely loaded converter, or the name of it if we do.
508             # We also don't have special converters between the same format.
509 139 100 66     709 if ( ($success == 2) && ($fmti ne $fmto) ) {
510 5         35 $glb_cvtname = &load_converter( $formats{$fmti, 'i_sname'} . '2'
511             . $formats{$fmto, 'i_sname'});
512             }
513              
514 139         346 &check_consist;
515              
516 139         583 ($success == 2);
517             }
518              
519             ######
520              
521             require "${glb_bpprefix}p-dload.pl";
522             # loads:
523             # bib'load_format
524             # bib'load_charset
525             # bib'find_bp_files
526             # bib'reg_format
527              
528             ######
529              
530             require "${glb_bpprefix}p-cs.pl";
531             # loads:
532             # variables used by the cs routines
533             # bib'nocharset
534             # bib'unicode_to_canon
535              
536             ######
537              
538             require "${glb_bpprefix}p-option.pl";
539             # loads:
540             # bib'stdargs
541             # bib'options
542             # bib'parse_num_option
543             # bib'parse_option
544             # bib'doc
545              
546             ###### open("file" [,"format"] );
547              
548             # Much like the normal open call, we use "foo" to open foo for read, ">foo"
549             # to open for write, and ">>foo" for append.
550             #
551             # Note that because of the way Perl filehandles are transferred, I can't tell
552             # the difference between STDOUT and 'STDOUT', so you must always use '-' for
553             # STDIN and '>-' (or '>>-') for STDOUT.
554             #
555             # XXXXX You can get the routines confused by giving them 'foo' and './foo'
556             # which point to the same file of course, but they have different names.
557              
558             sub open {
559 139     139   251 local($file, $format) = @_;
560 139         181 local($name, $mode);
561 139         169 local($fmt, $cset);
562              
563 139 50       296 &panic("open called with no arguments") unless defined $file;
564              
565             #&check_consist;
566              
567 139 100       503 if ($file =~ /^>>(.*)/) {
    50          
568 137         167 $mode = 'append'; $name = $1;
  137         282  
569             } elsif ($file =~ /^>(.*)/) {
570 0         0 $mode = 'write'; $name = $1;
  0         0  
571             } else {
572 2         4 $mode = 'read'; $name = $file;
  2         5  
573             }
574             # XXXXX for now, warn them about this.
575 139 50       326 &gotwarn("Using STDIN ${mode}s to the file 'STDIN'") if $name eq 'STDIN';
576 139 50       245 &gotwarn("Using STDOUT ${mode}s to the file 'STDOUT'") if $name eq 'STDOUT';
577 139 50       273 &gotwarn("Using STDERR ${mode}s to the file 'STDERR'") if $name eq 'STDERR';
578              
579             # We allow '-' to be read and written to at the same time. No others.
580             # XXXXX For now, files cannot be re-opened without an explicit close.
581 139 100       257 if ($name ne '-') {
582 2 50       7 if ($mode eq 'read') {
583 2 50       9 return &goterror("file $name is already opened for write")
584             if defined $glb_Orfmt{$name};
585 2 50       7 return &goterror("re-opening file $name") if defined $glb_Irfmt{$name};
586             } else {
587 0 0       0 return &goterror("file $name is already opened for read")
588             if defined $glb_Irfmt{$name};
589 0 0       0 return &goterror("re-opening file $name") if defined $glb_Orfmt{$name};
590             }
591             }
592              
593 139         180 $glb_vloc = undef;
594              
595 139 50       207 if (defined $format) {
596 0 0       0 return undef unless ($fmt, $cset) = &load_format($format);
597             } else {
598 139 100       210 if ($mode eq 'read') {
599 2         6 ($fmt, $cset) = &parse_format($glb_Iformat);
600             } else {
601 137         240 ($fmt, $cset) = &parse_format($glb_Oformat);
602             }
603             }
604              
605 139 100       305 if ($mode eq 'read') {
606 2         5 $glb_Ifilename = $name;
607 2         5 $glb_filelocmap{$name} = 0;
608 2         5 $glb_current_fh = "bib'GFMI" . $name;
609             # no strict 'subs';
610 2 50       7 $glb_current_fh = STDIN if $name eq '-'; # magic filehandle
611             } else {
612 137         210 $glb_Ofilename = $name;
613 137         197 $glb_current_fh = "bib'GFMO" . $name;
614             # no strict 'subs';
615 137 50       323 $glb_current_fh = STDOUT if $name eq '-'; # magic filehandle
616             }
617              
618 139         531 &debugs("opening $name<$fmt:$cset> for $mode", 4096);
619              
620 139         195 $glb_current_fmt = $fmt;
621 139         186 $glb_current_cset = $cset;
622 139         306 $func = $formats{$fmt, "open"};
623 139         570 $fmt = &$func($file); # pass the original argument, including the mode
624              
625 139 50       280 if (defined $fmt) {
626             # if we don't know our cset, then open should have returned it
627 139 100       249 if ($cset eq 'auto') {
628 2         9 ($fmt, $cset) = &parse_format($fmt);
629             } else {
630 137         251 ($fmt) = &parse_format($fmt);
631             }
632              
633 139 100       305 if ($mode eq 'read') {
634 2         7 $glb_Irfmt{$name} = $fmt;
635 2         5 $glb_Ircset{$name} = $cset;
636 2         12 $glb_Ifilemap{$name} = $glb_current_fh;
637             } else {
638 137         233 $glb_Orfmt{$name} = $fmt;
639 137         384 $glb_Orcset{$name} = $cset;
640 137         186 $glb_Ofilemap{$name} = $glb_current_fh;
641             }
642              
643 139         496 &debugs("opened $name<$fmt:$cset> for $mode", 1024);
644             } else {
645 0         0 &debugs("unable to open $name with format $format for $mode", 1024);
646             # XXXXX Assume that the module gave its own error message for failure
647             }
648              
649 139         294 &check_consist;
650              
651 139 50       272 if (wantarray) {
652 0         0 ($fmt, $cset);
653             } else {
654 139         355 $fmt;
655             }
656             }
657              
658              
659             ###### close( ["file"] );
660              
661             # XXXXX Since we don't currently allow files to be opened for read and write
662             # simultaneously we know which maps to use. If we allow this sometime,
663             # then we'll be in trouble.
664             #
665             # XXXXX We DO allow '-' to be opened for read and write.
666             # 1) the default map is input, unless the file starts with '>'.
667             # 2) if a file is not found in the default map, then the other is tried.
668             #
669             # This means that '>-' must be given to close STDOUT -- if not, you will
670             # end up closing STDIN. The other routines will _not_ strip off a '>'.
671             #
672             # With no arguments, we close the last INPUT file accessed.
673              
674             sub close {
675 2     2   8 local($file) = @_;
676 2         5 local($result);
677              
678             #&check_consist;
679              
680             # a close with no arguments closes the last file read from.
681 2 50       9 $file = $glb_Ifilename unless defined $file;
682             # check for file undefined?
683              
684             # XXXXX should we allow an optional extra > here (for append)?
685 2 50       11 if ($file =~ /^>(.*)/) {
686 0         0 $result = &close_output($1);
687             } else {
688             # try input
689 2         20 $result = &close_input($file);
690             # try output if that failed.
691 2 50       9 $result = &close_output($file) unless defined $result;
692             }
693 2 50       9 return &goterror("Closing unopened file $file") unless defined $result;
694              
695 2         8 &check_consist;
696              
697 2         8 $result;
698             }
699              
700             sub close_input {
701 2     2   7 local($name) = @_;
702 2         4 local($result);
703              
704 2 50       9 return undef unless defined $glb_Irfmt{$name};
705              
706 2         12 &debugs("closing input file $name", 512);
707              
708 2         5 $glb_current_fmt = $glb_Irfmt{$name};
709 2         6 $glb_current_cset = $glb_Ircset{$name};
710 2         5 $glb_current_fh = $glb_Ifilemap{$name};
711 2         9 $func = $formats{$glb_Irfmt{$name}, "close"};
712 2         11 $result = &$func($name);
713              
714 2         9 delete $glb_Irfmt{$name};
715 2         8 delete $glb_Ircset{$name};
716 2         4 delete $glb_Ifilemap{$name};
717 2         6 delete $glb_filelocmap{$file};
718 2 50       10 if ($name eq $glb_Ifilename) {
719 2         5 $glb_Ifilename = undef;
720 2         5 $glb_vloc = undef;
721             }
722 2         8 $result;
723             }
724              
725             sub close_output {
726 0     0   0 local($name) = @_;
727 0         0 local($result);
728              
729 0 0       0 return undef unless defined $glb_Orfmt{$name};
730              
731 0         0 &debugs("closing otuput file $name", 512);
732              
733 0         0 $glb_current_fmt = $glb_Orfmt{$name};
734 0         0 $glb_current_cset = $glb_Orcset{$name};
735 0         0 $glb_current_fh = $glb_Ofilemap{$name};
736 0         0 $func = $formats{$glb_Orfmt{$name}, "close"};
737 0         0 $result = &$func($name);
738 0         0 delete $glb_Orfmt{$name};
739 0         0 delete $glb_Orcset{$name};
740 0         0 delete $glb_Ofilemap{$name};
741 0 0       0 if ($name eq $glb_Ofilename) {
742 0         0 $glb_Ofilename = undef;
743 0         0 $glb_vloc = undef;
744             }
745 0         0 $result;
746             }
747              
748              
749             ###### read( ["file", "format"] );
750              
751             sub read {
752 134     134   228 local($file, $format) = @_;
753 134         184 local($fmt, $cset);
754              
755             # a read with no arguments reads from the last file read from or opened.
756 134 50       299 $file = $glb_Ifilename unless defined $file;
757              
758 134 50       321 if (!defined $glb_Irfmt{$file}) {
759 0         0 return &goterror("Reading from unopened file $file");
760             }
761              
762 134         160 $glb_Ifilename = $file;
763 134         152 $glb_vloc = undef;
764              
765 134 50 33     342 if ( (defined $format) && ($format ne $glb_Irfmt{$file}) ) {
766 0 0       0 return undef unless ($fmt, $cset) = &load_format($format);
767 0         0 &gotwarn("File '$file' read as '$format'.");
768             } else {
769 134         188 $fmt = $glb_Irfmt{$file};
770 134         237 $cset = $glb_Ircset{$file};
771             }
772              
773             #&debugs("reading $file<$fmt>", 32);
774              
775 134         188 $glb_filelocmap{$file}++;
776              
777 134         167 $glb_current_fmt = $fmt;
778 134         157 $glb_current_cset = $cset;
779 134         207 $glb_current_fh = $glb_Ifilemap{$file};
780 134         316 $func = $formats{$fmt, "read"};
781 134         444 &$func($file);
782             }
783              
784             ###### write( "file", "output-string", ["format"] );
785             #
786             # XXXXX We should have a way of writing to a string. Probably with file
787             # undefined, but output-string defined. What format will we be writing?
788             # we use the file to determine our format....
789              
790             sub write {
791 0     0   0 local($file, $out, $format) = @_;
792 0         0 local($fmt, $cset);
793              
794 0 0       0 &panic("write called with no arguments") unless defined $file;
795 0 0       0 &panic("write called with no output") unless defined $out;
796              
797 0 0       0 if (!defined $glb_Orfmt{$file}) {
798 0         0 return &goterror("Writing to unopened file $file");
799             }
800              
801 0 0 0     0 if ( (defined $format) && ($format ne $glb_Orfmt{$file}) ) {
802 0 0       0 return undef unless ($fmt, $cset) = &load_format($format);
803 0         0 &gotwarn("File '$file' written as '$format'.");
804             } else {
805 0         0 $fmt = $glb_Orfmt{$file};
806 0         0 $cset = $glb_Orcset{$file};
807             }
808              
809 0         0 $glb_current_fmt = $fmt;
810 0         0 $glb_current_cset = $cset;
811 0         0 $glb_current_fh = $glb_Ofilemap{$file};
812 0         0 $func = $formats{$fmt, "write"};
813 0         0 &$func($file, $out);
814             }
815              
816             ######
817             #
818             # convert is called when you want to convert between the informat and the
819             # outformat. It's always a good idea to use convert instead of doing the
820             # calls to explode->tocanon->fromcanon->implode yourself. Two reasons --
821             # first, if the in and out formats are the same, convert will just return,
822             # which will save you a lot of trouble. Second, it searches for special
823             # converters that can be set up to handle conversions from one type to
824             # another directly. This can not only be a lot faster, but can give you
825             # better results.
826             #
827             # XXXXX Test conversion or 'auto:tex' to 'auto:troff'.
828             # This should convert between any format, but with TeX characters, to
829             # the same format, but with troff characters.
830             #
831             ###### convert( "recin" );
832             #
833             sub convert {
834 0     0   0 local($recin) = @_;
835 0         0 local($ifmt, $icset);
836 0         0 local($ofmt, $ocset);
837              
838 0 0       0 &panic("convert called with no arguments") unless defined $recin;
839              
840             # XXXXX Should we be converting between Iformat and Oformat, or last file
841             # in and lastfile out?
842              
843             # $ifmt = $glb_Irfmt{$glb_Ifilename};
844             # $icset = $glb_Ircset{$glb_Ifilename};
845              
846             # $ofmt = $glb_Orfmt{$glb_Ofilename};
847             # $ocset = $glb_Orcset{$glb_Ofilename};
848              
849             # ($ifmt, $icset) = &parse_format($glb_Iformat) unless defined $ifmt;
850             # ($ofmt, $ocset) = &parse_format($glb_Oformat) unless defined $ofmt;
851              
852             # XXXXX We now use the format specs, and back off to the most recent file
853             # only if we're auto formatting.
854              
855 0         0 ($ifmt, $icset) = split(/:/, $glb_Iformat);
856 0         0 ($ofmt, $ocset) = split(/:/, $glb_Oformat);
857              
858 0 0       0 &debugs("conv1 <$ifmt:$icset> to <$ofmt:$ocset>", 512) if $glb_debug;
859 0 0       0 if ($ifmt eq 'auto') {
860 0 0       0 return &goterror("Convert has no input format") unless defined $glb_Ifilename;
861 0         0 $ifmt = $glb_Irfmt{$glb_Ifilename};
862             }
863 0 0       0 if ($icset eq 'auto') {
864 0 0       0 return &goterror("Convert has no input charset") unless defined $glb_Ifilename;
865 0         0 $icset = $glb_Ircset{$glb_Ifilename};
866             }
867 0 0       0 if ($ofmt eq 'auto') {
868 0 0       0 if (defined $glb_Ofilename) {
869 0         0 $ofmt = $glb_Orfmt{$glb_Ofilename};
870             } else {
871 0         0 $ofmt = $ifmt;
872             }
873             }
874 0 0       0 if ($ocset eq 'auto') {
875 0 0       0 if (defined $glb_Ofilename) {
876 0         0 $ocset = $glb_Orcset{$glb_Ofilename};
877             } else {
878 0         0 $ocset = $icset;
879             }
880             }
881 0 0       0 &debugs("conv2 <$ifmt:$icset> to <$ofmt:$ocset>", 512) if $glb_debug;
882            
883 0 0       0 if ($ifmt eq $ofmt) {
884             # same format, same character set
885 0 0       0 return $recin if ($icset eq $ocset);
886             # same format, diff charset, but they don't want conversion.
887 0 0       0 return $recin unless $opt_CSConvert;
888              
889             # same format, different character set. Go to canon, then from canon.
890             # XXXXX XXF use the protection test here.
891 0         0 $recin =~ s/$bib'cs_escape/$bib'cs_char_escape/go;
892 0         0 local($reccan, $recout);
893 0         0 $func = $charsets{$icset, 'tocanon'};
894 0         0 $reccan = &$func($recin, $opt_CSProtect);
895 0         0 $func = $charsets{$ocset, 'fromcanon'};
896 0         0 $recout = &$func($reccan, $opt_CSProtect);
897 0         0 $recout =~ s/$bib'cs_char_escape/$bib'cs_escape/go;
898 0         0 return $recout;
899             }
900              
901             # Different formats. First check for a special converter. cvtname is
902             # safely defined in format.
903             # XXXXX charsets with special converters? Probably not.
904             # Note that a special converter means no charset mapping. The converter
905             # is expected to do that.
906              
907 0 0       0 if (defined $glb_cvtname) {
908 0         0 &debugs("calling converter '$glb_cvtname'", 128);
909 0         0 $func = $special_converters{$glb_cvtname, 'convert'};
910 0         0 return &$func($recin);
911             }
912              
913             #if we don't have a special converter, we do it the hard way.
914 0 0       0 &debugs("convert through canon", 128) if $glb_debug;
915              
916             # By the way, unrolling all four of these functions saves only about
917             # 2 seconds off of a 53 second run (1043 records). Really not worth it.
918 0         0 &implode(&fromcanon(&tocanon(&explode($recin))));
919              
920             }
921              
922             ######
923             #
924             # explode ( $input_record )
925             #
926             # explode ( $input_record , $file_name )
927             #
928             # Explode a record from it's textual form into an assosiative array which
929             # is returned. In the second form, the input file name determines the
930             # format to use instead of the current default.
931             #
932             ######
933              
934             sub explode {
935 132     132   292 local($recin, $file) = @_;
936 132         174 local($fmt, $cset);
937              
938 132 50       285 return undef unless defined $recin;
939              
940 132 50       205 if (defined $file) {
941 0 0       0 if (defined $glb_Irfmt{$file}) {
    0          
942 0         0 $fmt = $glb_Irfmt{$file};
943 0         0 $cset = $glb_Ircset{$file};
944             } elsif (defined $glb_Orfmt{$file}) {
945 0         0 $fmt = $glb_Orfmt{$file};
946 0         0 $cset = $glb_Orcset{$file};
947             } else {
948 0         0 return &goterror("unopened file $file given to explode");
949             }
950 0 0       0 &debugs("explode $file<$fmt:$cset>", 32) if $glb_debug;
951             } else {
952 132 50       214 if (defined $glb_Ifilename) {
953 132         239 $fmt = $glb_Irfmt{$glb_Ifilename};
954 132         254 $cset = $glb_Ircset{$glb_Ifilename};
955             } else {
956 0         0 ($fmt, $cset) = split(/:/, $glb_Iformat);
957             }
958 132 50       279 &debugs("explode <$fmt:$cset>", 32) if $glb_debug;
959             }
960              
961             # Records in exploded format need to be able to use our seperator
962             # character and the like. So we protect our escape character here.
963 132         315 $recin =~ s/$bib'cs_escape/$bib'cs_char_escape/go;
964              
965 132         158 $glb_current_fmt = $fmt;
966 132         157 $glb_current_cset = $cset;
967 132         309 $func = $formats{$fmt, 'explode'};
968 132         543 &$func($recin);
969             }
970              
971             ######
972             #
973             # implode ( %output_record )
974             #
975             # implode ( %output_record, $file_name )
976             #
977             # Implode a record from it's associative array into it's textual form, which
978             # is returned. In the second form, the input file name determines the
979             # format to use instead of the current default.
980             #
981             ######
982              
983             sub implode {
984 0     0   0 local(%recout, $file) = @_;
985 0         0 local($fmt, $cset, $recout);
986              
987 0 0       0 if (defined $file) {
988 0 0       0 if (defined $glb_Orfmt{$file}) {
    0          
989 0         0 $fmt = $glb_Orfmt{$file};
990 0         0 $cset = $glb_Orcset{$file};
991             } elsif (defined $glb_Irfmt{$file}) {
992 0         0 $fmt = $glb_Irfmt{$file};
993 0         0 $cset = $glb_Ircset{$file};
994             } else {
995 0         0 return &goterror("unopened file $file given to implode");
996             }
997 0 0       0 &debugs("implode $file<$fmt:$cset>", 32) if $glb_debug;
998             } else {
999 0 0       0 if (defined $glb_Ofilename) {
1000 0         0 $fmt = $glb_Orfmt{$glb_Ofilename};
1001 0         0 $cset = $glb_Orcset{$glb_Ofilename};
1002             } else {
1003 0         0 ($fmt, $cset) = split(/:/, $glb_Oformat);
1004             }
1005 0 0       0 &debugs("implode <$fmt:$cset>", 32) if $glb_debug;
1006             }
1007              
1008 0         0 $glb_current_fmt = $fmt;
1009 0         0 $glb_current_cset = $cset;
1010 0         0 $func = $formats{$fmt, 'implode'};
1011 0         0 $recout = &$func(%recout);
1012              
1013             # We need to unprotect our escape character now. But leave canon fmt alone.
1014 0 0 0     0 if ( ($recout =~ /$bib'cs_escape/o) && ($fmt ne 'canon') ) {
1015 0 0       0 $recout =~ s/$bib'cs_sep/\//go && &gotwarn("Seperator1 left in $recout");
1016 0 0       0 $recout =~ s/$bib'cs_sep2/\//go && &gotwarn("Seperator2 left in $recout");
1017 0 0       0 $recout =~ s/$bib'cs_escape[^e]//g && &gotwarn("Unknown escape found in $recout");
1018 0         0 $recout =~ s/$bib'cs_char_escape/$bib'cs_escape/go;
1019             }
1020 0         0 $recout;
1021             }
1022              
1023             ###### tocanon( "%recexp" [, "file"] );
1024              
1025             sub tocanon {
1026 132     132   1246 local(%recexp, $file) = @_;
1027 132         351 local($fmt, $cset);
1028 132         188 local($field, $val);
1029              
1030 132 50       219 if (defined $file) {
1031 0         0 $fmt = $glb_Irfmt{$file};
1032 0         0 $cset = $glb_Ircset{$file};
1033 0 0       0 if (!defined $fmt) {
1034 0         0 $fmt = $glb_Orfmt{$file};
1035 0         0 $cset = $glb_Orcset{$file};
1036             }
1037 0 0       0 return &goterror("unopened file $file given to tocanon") unless defined $fmt;
1038 0 0       0 &debugs("tocanon $file<$fmt:$cset>", 32) if $glb_debug;
1039             } else {
1040 132 50       266 if (defined $glb_Ifilename) {
1041 132         292 $fmt = $glb_Irfmt{$glb_Ifilename};
1042 132         285 $cset = $glb_Ircset{$glb_Ifilename};
1043             }
1044             #if ( (!defined $fmt) && (defined $glb_Ofilename) ) {
1045             # $fmt = $glb_Orfmt{$glb_Ofilename};
1046             # $cset = $glb_Orcset{$glb_Ofilename};
1047             #}
1048 132 50       289 ($fmt, $cset) = split(/:/, $glb_Iformat) unless defined $fmt;
1049 132 50       291 &debugs("tocanon <$fmt:$cset>", 32) if $glb_debug;
1050             }
1051              
1052             # First, do character set conversion
1053             # XXXXX if we're protecting output, does that _always_ mean unprotect input?
1054              
1055 132 50       225 if ($opt_CSConvert) {
1056 132         358 $func = $charsets{$cset, 'tocanon'};
1057 132 50       351 if ( defined $charsets{$cset, 'toesc'} ) {
1058 132         268 local($teststr) = $charsets{$cset, 'toesc'};
1059             # XXXXX We may get a speedup or a slowdown (depending on the input)
1060             # by putting this loop inside a test:
1061             # if (join("", values %recexp) =~ /$teststr/)
1062             # which if no match is found is ~3 times faster than the loop.
1063             # Of course if a match _is_ found, it's wasted time.
1064             # XXXXX Another idea is to put this all inside an eval. This would
1065             # expand $teststr for us as well as func, and look a lot cleaner.
1066             # Also, hooks would be more efficient. But... profiling tests
1067             # have the eval idea almost twice as slow as this. Oh well.
1068 132         479 while (($field, $val) = each %recexp) {
1069 1996 100       7315 next unless $val =~ /$teststr/;
1070 84         435 $recexp{$field} = &$func($val, $opt_CSProtect);
1071             }
1072             } else {
1073 0         0 while (($field, $val) = each %recexp) {
1074 0         0 $recexp{$field} = &$func($val, $opt_CSProtect);
1075             }
1076             }
1077             }
1078              
1079             # Next, do the format conversion
1080              
1081 132         173 $glb_current_fmt = $fmt;
1082 132         171 $glb_current_cset = $cset;
1083 132         286 $func = $formats{$fmt, 'tocanon'};
1084 132         796 &$func(%recexp);
1085             }
1086              
1087             ###### fromcanon( "%reccan" [, "file"] );
1088              
1089             sub fromcanon {
1090 132     132   1404 local(%record, $file) = @_;
1091 132         426 local(%recexp);
1092 132         167 local($fmt, $cset);
1093 132         161 local($field, $val);
1094              
1095 132 50       242 if (defined $file) {
1096 0         0 $fmt = $glb_Orfmt{$file};
1097 0         0 $cset = $glb_Orcset{$file};
1098 0 0       0 if (!defined $fmt) {
1099 0         0 $fmt = $glb_Irfmt{$file};
1100 0         0 $cset = $glb_Ircset{$file};
1101             }
1102 0 0       0 return &goterror("unopened file $file given to fromcanon") unless defined $fmt;
1103 0 0       0 &debugs("fromcanon $file<$fmt:$cset>", 32) if $glb_debug;
1104             } else {
1105 132 50       312 if (defined $glb_Ofilename) {
1106 132         255 $fmt = $glb_Orfmt{$glb_Ofilename};
1107 132         218 $cset = $glb_Orcset{$glb_Ofilename};
1108             }
1109 132 50       252 ($fmt, $cset) = split(/:/, $glb_Oformat) unless defined $fmt;
1110 132 50       248 &debugs("fromcanon <$fmt:$cset>", 32) if $glb_debug;
1111             }
1112              
1113             # First, the format conversion
1114              
1115 132         148 $glb_current_fmt = $fmt;
1116 132         149 $glb_current_cset = $cset;
1117 132         297 $func = $formats{$fmt, "fromcanon"};
1118 132         856 %recexp = &$func(%record);
1119              
1120             # Next, the character set conversion
1121              
1122 132 50       590 if ($opt_CSConvert) {
1123 132         308 $func = $charsets{$cset, 'fromcanon'};
1124             # Pick which loop we'll run
1125 132 50       295 if ( defined $charsets{$cset, 'fromesc'} ) {
1126             # fast loop -- check to see if we have any specials before calling.
1127 132         335 local($teststr) = $charsets{$cset, 'fromesc'};
1128 132         458 while (($field, $val) = each %recexp) {
1129 2128 100       7354 next unless $val =~ /$teststr/;
1130 6         39 $recexp{$field} = &$func($val, $opt_CSProtect);
1131             }
1132             } else {
1133             # call the conversion routine for each field.
1134 0         0 while (($field, $val) = each %recexp) {
1135 0         0 $recexp{$field} = &$func($val, $opt_CSProtect);
1136             }
1137             }
1138             }
1139              
1140 132         2554 %recexp;
1141             }
1142              
1143             ######
1144              
1145             sub clear {
1146 2     2   16 local($file) = @_;
1147 2         4 local($fmt);
1148              
1149 2 50       7 if (!defined $file) {
1150 2         12 &errors('clear');
1151 2         3 return 1;
1152             }
1153 0 0         if ($file =~ /^>(.*)/) {
1154 0           $fmt = $glb_Orfmt{$1};
1155             } else {
1156 0           $fmt = $glb_Irfmt{$file};
1157 0 0         $fmt = $glb_Orfmt{$file} unless defined $fmt;
1158             }
1159 0 0         return &goterror("clearing unopened file $file") unless defined $fmt;
1160 0           $func = $formats{$fmt, "clear"};
1161 0           &$func($file);
1162             }
1163              
1164              
1165             require "${glb_bpprefix}p-stdbib.pl";
1166             # loads:
1167             # bib'open_stdbib
1168             # bib'close_stdbib
1169             # bib'read_stdbib
1170             # bib'write_stdbib
1171             # bib'clear_stdbib
1172             # bib'implode_stdbib
1173             # bib'explode_stdbib
1174             # bib'tocanon_stdbib
1175             # bib'fromcanon_stdbib
1176              
1177             ######
1178             #
1179             # Load in various utility routines that format modules may want to call.
1180             #
1181             # These go into package bp_util, not bib!
1182             #
1183              
1184             require "${glb_bpprefix}p-utils.pl";
1185             # loads:
1186             # bp_util'mname_to_canon
1187             # bp_util'name_to_canon
1188             # bp_util'canon_to_name
1189             # bp_util'parsedate
1190              
1191             ##################
1192             #
1193             # Set the default format and clear errors.
1194             #
1195              
1196              
1197             if (defined $main'bibpackage_do_not_load_defaults) {
1198             # special trickery for debugging and profiling.
1199             $main'bibpackage_do_not_load_defaults = 1; # stop one-use warning
1200             &debugs("bp package loaded without defaults", 65536);
1201             } else {
1202             &format("auto") || die &goterror("Could not load default format.", "package");
1203             &clear;
1204             #&check_consist;
1205             &debugs("bp package loaded with defaults", 65536);
1206             }
1207              
1208             #######################
1209             # end of package
1210             #######################
1211              
1212             1;