| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# bibliography package for Perl |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# utility subroutines |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Dana Jacobsen (dana@acm.org) |
|
7
|
|
|
|
|
|
|
# 11 January 1995 |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package bp_util; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
###### |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
$opt_complex = 1; |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# The global key registry. |
|
17
|
|
|
|
|
|
|
%glb_keyreg = (); |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# |
|
20
|
|
|
|
|
|
|
# mname_to_canon takes a name string and returns it back as a Canonical name. |
|
21
|
|
|
|
|
|
|
# |
|
22
|
|
|
|
|
|
|
# Example input: |
|
23
|
|
|
|
|
|
|
# |
|
24
|
|
|
|
|
|
|
# John von Jones, Jr., Ed Krol, Ludwig von Beethoven |
|
25
|
|
|
|
|
|
|
# |
|
26
|
|
|
|
|
|
|
# output: |
|
27
|
|
|
|
|
|
|
# |
|
28
|
|
|
|
|
|
|
# Jones,von,John,Jr./Krol,Ed,/Beethoven,von,Ludwig, |
|
29
|
|
|
|
|
|
|
# |
|
30
|
|
|
|
|
|
|
# (the actual seperators are $cs_sep for '/' and $cs_sep2 for ',') |
|
31
|
|
|
|
|
|
|
# |
|
32
|
|
|
|
|
|
|
# This is a total heuristic hack, and if you know where names are split, |
|
33
|
|
|
|
|
|
|
# use multiple calls to name_to_canon instead. Use this routine if you |
|
34
|
|
|
|
|
|
|
# expect the input to be some sort of free-form such that you can't |
|
35
|
|
|
|
|
|
|
# easily seperate the names yourself. |
|
36
|
|
|
|
|
|
|
# |
|
37
|
|
|
|
|
|
|
# This routine assumes there can be multiple authors per line, seperated by |
|
38
|
|
|
|
|
|
|
# "and" or commas, and it's going to try to guess how to break them up, |
|
39
|
|
|
|
|
|
|
# given that it can get "name1, name2, jr, name3" as a 3 name string with |
|
40
|
|
|
|
|
|
|
# "name2, jr" as the second name. This method precludes the ability to |
|
41
|
|
|
|
|
|
|
# also correctly parse "last, first" format strings. If that is the format |
|
42
|
|
|
|
|
|
|
# your string is in, call the function with a "1" as the second argument. |
|
43
|
|
|
|
|
|
|
# |
|
44
|
|
|
|
|
|
|
# Note that no-break-space ("tie", ~ in TeX, \0 in troff) is \240. |
|
45
|
|
|
|
|
|
|
# |
|
46
|
|
|
|
|
|
|
sub mname_to_canon { |
|
47
|
0
|
|
|
0
|
|
0
|
local($allnames, $revauthor) = @_; |
|
48
|
0
|
|
|
|
|
0
|
local($firstn, $vonn, $lastn, $jrn); |
|
49
|
0
|
|
|
|
|
0
|
local(@names, $name, $oname, $nname, $rest); |
|
50
|
0
|
|
|
|
|
0
|
local(@cnames) = (); |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Squeeze all spaces into one space. |
|
53
|
0
|
|
|
|
|
0
|
$allnames =~ s/\s+/ /g; |
|
54
|
|
|
|
|
|
|
# remove any beginning and trailing ands. |
|
55
|
0
|
|
|
|
|
0
|
$allnames =~ s/^and //; |
|
56
|
0
|
|
|
|
|
0
|
$allnames =~ s/ and$//; |
|
57
|
|
|
|
|
|
|
|
|
58
|
0
|
|
|
|
|
0
|
@names = split(/,? and /, $allnames); |
|
59
|
0
|
|
|
|
|
0
|
while (@names) { |
|
60
|
0
|
|
|
|
|
0
|
$oname = $name = shift @names; |
|
61
|
0
|
|
|
|
|
0
|
$firstn = $vonn = $lastn = $jrn = ''; |
|
62
|
|
|
|
|
|
|
# name has no spaces at beginning or end |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# squeeze all spaces around commas. They aren't telling us anything that |
|
65
|
|
|
|
|
|
|
# we can rely on, and it simplifies matching. Also combine them. |
|
66
|
0
|
|
|
|
|
0
|
$name =~ s/,+/,/g; |
|
67
|
0
|
|
|
|
|
0
|
$name =~ s/ ,/,/g; |
|
68
|
0
|
|
|
|
|
0
|
$name =~ s/, /,/g; |
|
69
|
|
|
|
|
|
|
|
|
70
|
0
|
0
|
0
|
|
|
0
|
if ( $revauthor && ($name =~ /,/) ) { |
|
71
|
0
|
0
|
|
|
|
0
|
if ($name =~ s/[, ]+([sj]r\.?|I+)$//i) { |
|
72
|
0
|
|
|
|
|
0
|
$jrn = ",$1"; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
0
|
|
|
|
|
0
|
$name =~ s/^(.*),(.*)/$2 $1$jrn/g; |
|
75
|
|
|
|
|
|
|
# name has no spaces at beg or end |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
0
|
|
|
|
|
0
|
$name =~ s/[ \240]+([sj]r\.?|\(?edi?t?o?r?s?\.?\)?|I+)(,|$)/,$1/i; |
|
79
|
0
|
|
|
|
|
0
|
($nname, $rest, $jrn) = split(/,([^\240])/, $name, 2); |
|
80
|
0
|
0
|
|
|
|
0
|
$jrn = (defined $jrn) ? "$rest$jrn" : ''; |
|
81
|
|
|
|
|
|
|
#$jrn =~ s/,+$//; |
|
82
|
|
|
|
|
|
|
# nname has no spaces at beg or end. |
|
83
|
|
|
|
|
|
|
# jrn has no spaces at beg or end. |
|
84
|
0
|
0
|
|
|
|
0
|
if ($jrn =~ / /) { |
|
85
|
0
|
|
|
|
|
0
|
($jrn, $rest) = $jrn =~ /([sj]r\.?|\(?edi?t?o?r?s?\.?\)?|I+)?,?(.*)$/i; |
|
86
|
0
|
0
|
|
|
|
0
|
unshift(@names, $rest) if defined $rest; |
|
87
|
0
|
0
|
|
|
|
0
|
$jrn = '' unless defined $jrn; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
0
|
|
|
|
|
0
|
($firstn) = $nname =~ /^((\S* )*)/; |
|
90
|
0
|
|
|
|
|
0
|
$nname = substr($nname, length($firstn)); |
|
91
|
|
|
|
|
|
|
# nname has no spaces at beg or end. |
|
92
|
0
|
|
|
|
|
0
|
$lastn = $nname; |
|
93
|
0
|
|
|
|
|
0
|
$lastn =~ s/\240+/ /g; |
|
94
|
0
|
|
|
|
|
0
|
$firstn =~ s/\240+/ /g; |
|
95
|
0
|
|
|
|
|
0
|
$jrn =~ s/\240+/ /g; |
|
96
|
0
|
|
|
|
|
0
|
while ($firstn =~ / ([a-z]+ )$/) { |
|
97
|
0
|
|
|
|
|
0
|
$rest = $1; |
|
98
|
0
|
|
|
|
|
0
|
substr($vonn, 0, 0) = $rest; |
|
99
|
|
|
|
|
|
|
# XXXXX removed " - 1" from position argument |
|
100
|
0
|
|
|
|
|
0
|
substr($firstn, length($firstn) - length($rest)) = ''; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
0
|
|
|
|
|
0
|
while ($lastn =~ /^([a-z]+ )/) { |
|
103
|
0
|
|
|
|
|
0
|
$rest = $1; |
|
104
|
0
|
|
|
|
|
0
|
$vonn .= $rest; |
|
105
|
0
|
|
|
|
|
0
|
$lastn = substr($lastn, length($rest)); |
|
106
|
|
|
|
|
|
|
} |
|
107
|
0
|
|
|
|
|
0
|
$vonn =~ s/\s+$//; |
|
108
|
0
|
|
|
|
|
0
|
$firstn =~ s/\s+$//; |
|
109
|
|
|
|
|
|
|
#print STDERR ":$vonn:$lastn:$firstn:$jrn:\n"; |
|
110
|
|
|
|
|
|
|
|
|
111
|
0
|
0
|
|
|
|
0
|
if ($jrn) { |
|
112
|
0
|
0
|
|
|
|
0
|
if ($jrn =~ /^(et\.? ?al\.?)|(others)$/i) { |
|
113
|
0
|
|
|
|
|
0
|
$jrn = ''; |
|
114
|
0
|
|
|
|
|
0
|
unshift(@names, "et al."); |
|
115
|
|
|
|
|
|
|
} |
|
116
|
0
|
0
|
|
|
|
0
|
if ($jrn =~ /^inc[\.]?$/i) { |
|
117
|
0
|
|
|
|
|
0
|
$lastn .= ", " . $jrn; |
|
118
|
0
|
|
|
|
|
0
|
$jrn = ''; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
} |
|
121
|
0
|
0
|
|
|
|
0
|
if ($lastn =~ /^(et ?al)|(others)$/i) { |
|
122
|
0
|
|
|
|
|
0
|
$lastn = "et al."; |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
0
|
push( @cnames, join($bib'cs_sep2, $lastn, $vonn, $firstn, $jrn) ); |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
0
|
$name = join( $bib'cs_sep, @cnames ); |
|
129
|
0
|
|
|
|
|
0
|
$name =~ s/\s+$//; |
|
130
|
0
|
|
|
|
|
0
|
$name =~ s/\s+/ /g; |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# remove any spaces before and after parts of names. |
|
133
|
0
|
|
|
|
|
0
|
1 while $name =~ s/ ${bib'cs_sep2}/${bib'cs_sep2}/go; |
|
134
|
0
|
|
|
|
|
0
|
1 while $name =~ s/${bib'cs_sep2} /${bib'cs_sep2}/go; |
|
135
|
|
|
|
|
|
|
|
|
136
|
0
|
|
|
|
|
0
|
$name; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
######### |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# |
|
142
|
|
|
|
|
|
|
# name_to_canon takes a _single_ name and returns it back as a Canonical name. |
|
143
|
|
|
|
|
|
|
# |
|
144
|
|
|
|
|
|
|
# This will be faster than mname_to_canon. I also wrote it for bp, and |
|
145
|
|
|
|
|
|
|
# mname_to_canon is full of weird TeX things from r2b. |
|
146
|
|
|
|
|
|
|
# |
|
147
|
|
|
|
|
|
|
# Note that there are a few differences between the two. Notably, that |
|
148
|
|
|
|
|
|
|
# we only break out a von if it is space seperated -- a nbsp (tie) will |
|
149
|
|
|
|
|
|
|
# prevent us from breaking it. Note that nbsp => \240. |
|
150
|
|
|
|
|
|
|
# |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub name_to_canon { |
|
153
|
0
|
|
|
0
|
|
0
|
local($name, $revauthor) = @_; |
|
154
|
0
|
|
|
|
|
0
|
local($first, $last, $von, $jrn); |
|
155
|
|
|
|
|
|
|
|
|
156
|
0
|
0
|
|
|
|
0
|
&bib'panic("name_to_canon called with no arguments") unless defined $name; |
|
157
|
|
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
0
|
$name =~ s/\s+/ /g; |
|
159
|
0
|
|
|
|
|
0
|
$name =~ s/ $//; |
|
160
|
0
|
|
|
|
|
0
|
$von = ''; $jrn = ''; |
|
|
0
|
|
|
|
|
0
|
|
|
161
|
|
|
|
|
|
|
|
|
162
|
0
|
0
|
|
|
|
0
|
if ($name =~ s/[, ]+([sj]r\.?|I+)$//i) { |
|
163
|
0
|
|
|
|
|
0
|
$jrn = $1; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
# name has no space at end |
|
166
|
|
|
|
|
|
|
# jrn has no space at beg or end |
|
167
|
0
|
0
|
0
|
|
|
0
|
if ( $revauthor && ($name =~ /,/) ) { |
|
168
|
0
|
|
|
|
|
0
|
$name =~ s/^(.*)\s*,\s*(.*)/$2 $1/g; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
# strip off Jr., but leave "Hunt,\0Jr." alone. |
|
171
|
0
|
0
|
0
|
|
|
0
|
if (($name =~ /,/) && ($name !~ /,\240/) ) { |
|
172
|
|
|
|
|
|
|
# XXXXX Check the logic here |
|
173
|
0
|
0
|
|
|
|
0
|
if (!$revauthor) { |
|
174
|
0
|
0
|
|
|
|
0
|
if ($jrn) { |
|
175
|
|
|
|
|
|
|
# possibly reversed? |
|
176
|
0
|
|
|
|
|
0
|
local($newname) = &name_to_canon($name, 'reverse'); |
|
177
|
0
|
0
|
|
|
|
0
|
if (defined $newname) { |
|
178
|
0
|
|
|
|
|
0
|
&bib'gotwarn("Names are in reverse order?"); |
|
179
|
0
|
|
|
|
|
0
|
return $newname; |
|
180
|
|
|
|
|
|
|
} else { |
|
181
|
0
|
|
|
|
|
0
|
&bib'goterror("name_to_canon already got jr!"); |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
} else { |
|
184
|
0
|
0
|
|
|
|
0
|
&bib'goterror("Names seem to be reversed!") if $jrn; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
} |
|
187
|
0
|
|
|
|
|
0
|
($name, $jrn) = split(/ ?, ?/, $name, 2); |
|
188
|
|
|
|
|
|
|
} |
|
189
|
0
|
0
|
|
|
|
0
|
if ($name =~ / /) { |
|
190
|
0
|
|
|
|
|
0
|
($first, $last) = $name =~ /(.*) (\S*)$/; |
|
191
|
|
|
|
|
|
|
} else { |
|
192
|
0
|
|
|
|
|
0
|
$first = ''; |
|
193
|
0
|
|
|
|
|
0
|
$last = $name; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
0
|
0
|
|
|
|
0
|
if ($first =~ / ([a-z].*)$/) { |
|
196
|
0
|
|
|
|
|
0
|
$von = $1; |
|
197
|
0
|
|
|
|
|
0
|
$von =~ s/\240/ /g; |
|
198
|
0
|
|
|
|
|
0
|
substr($first, length($first)-length($von)-1) = ''; |
|
199
|
|
|
|
|
|
|
#$first =~ s/ $von//; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
0
|
|
|
|
|
0
|
while ($last =~ /^([a-z]+)\240/) { |
|
202
|
0
|
|
|
|
|
0
|
$von .= " $1"; |
|
203
|
0
|
|
|
|
|
0
|
substr($last, 0, length($1)+1) = ''; |
|
204
|
|
|
|
|
|
|
} |
|
205
|
0
|
|
|
|
|
0
|
$von =~ s/^ //; |
|
206
|
0
|
|
|
|
|
0
|
$last =~ s/\240/ /g; |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
#print STDERR ":$last:$von:$first:$jrn:\n"; |
|
209
|
|
|
|
|
|
|
|
|
210
|
0
|
|
|
|
|
0
|
$name = join( $bib'cs_sep2, $last, $von, $first, $jrn); |
|
211
|
|
|
|
|
|
|
|
|
212
|
0
|
|
|
|
|
0
|
$name =~ s/\s+$//; |
|
213
|
0
|
|
|
|
|
0
|
$name =~ s/\s+/ /g; |
|
214
|
|
|
|
|
|
|
# remove spaces before and after seperators. |
|
215
|
0
|
|
|
|
|
0
|
1 while $name =~ s/ ${bib'cs_sep2}/${bib'cs_sep2}/go; |
|
216
|
0
|
|
|
|
|
0
|
1 while $name =~ s/${bib'cs_sep2} /${bib'cs_sep2}/go; |
|
217
|
|
|
|
|
|
|
|
|
218
|
0
|
0
|
|
|
|
0
|
if ($opt_complex > 1) { |
|
219
|
0
|
|
|
|
|
0
|
($last, $von, $first, $jrn) = split($bib'cs_sep2, $name); |
|
220
|
|
|
|
|
|
|
# Look for corporations |
|
221
|
0
|
0
|
|
|
|
0
|
if ($jrn =~ /^Inc\.$/i) { |
|
222
|
0
|
|
|
|
|
0
|
$jrn = ''; |
|
223
|
0
|
|
|
|
|
0
|
$last = $last . ", Inc."; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
# put it back together |
|
226
|
0
|
|
|
|
|
0
|
$name = join( $bib'cs_sep2, $last, $von, $first, $jrn); |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
|
|
229
|
0
|
|
|
|
|
0
|
$name; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# This routine turns a name string (possibly containing multiple names) in |
|
233
|
|
|
|
|
|
|
# canon format into a string suitable for output. |
|
234
|
|
|
|
|
|
|
# |
|
235
|
|
|
|
|
|
|
# The styles supported are: |
|
236
|
|
|
|
|
|
|
# |
|
237
|
|
|
|
|
|
|
# bibtex First von Last [or] von Last, First [or] von Last, Jr, First |
|
238
|
|
|
|
|
|
|
# |
|
239
|
|
|
|
|
|
|
# plain First von Last, Jr |
|
240
|
|
|
|
|
|
|
# |
|
241
|
|
|
|
|
|
|
# reverse von Last, First, Jr |
|
242
|
|
|
|
|
|
|
# |
|
243
|
|
|
|
|
|
|
# reverse2 Last, First von, Jr |
|
244
|
|
|
|
|
|
|
# |
|
245
|
|
|
|
|
|
|
# lname1 von Last, Jr, First [for first author] |
|
246
|
|
|
|
|
|
|
# First von Last [for subsequesent authors] |
|
247
|
|
|
|
|
|
|
# |
|
248
|
|
|
|
|
|
|
# XXXXX |
|
249
|
|
|
|
|
|
|
# |
|
250
|
|
|
|
|
|
|
# What we should do instead is have a more general solution. We could specify |
|
251
|
|
|
|
|
|
|
# names in the above sort of format, and have it parse that. But then how do |
|
252
|
|
|
|
|
|
|
# we handle BibTeX, which will make decisions based on what fields exist? But |
|
253
|
|
|
|
|
|
|
# for most of these, something like "FvL,J" or "vL,F,J" or "L,Fv,J" would work. |
|
254
|
|
|
|
|
|
|
# |
|
255
|
|
|
|
|
|
|
# Also, we really need a generic output form, that handles more subtle |
|
256
|
|
|
|
|
|
|
# variations, like when to put "et al." in place of 150 names, and a different |
|
257
|
|
|
|
|
|
|
# separator for the last name (", and " instead of ", "), initials, and so on. |
|
258
|
|
|
|
|
|
|
# |
|
259
|
|
|
|
|
|
|
# XXXXX Check out bibtex parsing. We look for a space, but we've tied all |
|
260
|
|
|
|
|
|
|
# spaces already! |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
sub canon_to_name { |
|
263
|
120
|
|
|
120
|
|
249
|
local($cname, $how) = @_; |
|
264
|
120
|
|
|
|
|
188
|
local(@names); |
|
265
|
120
|
|
|
|
|
127
|
local($name); |
|
266
|
120
|
|
|
|
|
174
|
local($n, $von, $last, $jr, $first); |
|
267
|
120
|
|
|
|
|
180
|
local($namenum) = 0; |
|
268
|
|
|
|
|
|
|
|
|
269
|
120
|
50
|
|
|
|
232
|
&bib'panic("canon_to_name called with no arguments") unless defined $cname; |
|
270
|
120
|
50
|
|
|
|
194
|
$how = 'bibtex' unless defined $how; |
|
271
|
|
|
|
|
|
|
|
|
272
|
120
|
|
|
|
|
407
|
foreach $name ( split(/$bib'cs_sep/o, $cname) ) { |
|
273
|
304
|
|
|
|
|
318
|
$namenum++; |
|
274
|
304
|
|
|
|
|
878
|
($last, $von, $first, $jr) = split(/$bib'cs_sep2/o, $name, 4); |
|
275
|
304
|
|
|
|
|
468
|
$last =~ s/ /\240/g; |
|
276
|
304
|
|
|
|
|
333
|
$von =~ s/ /\240/g; |
|
277
|
304
|
50
|
|
|
|
1005
|
if ($how =~ /^bibtex/) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
# Turn ties back into spaces. |
|
279
|
0
|
|
|
|
|
0
|
$last =~ s/([^,])\240/$1 /g; |
|
280
|
0
|
|
|
|
|
0
|
$von =~ s/\240([a-z])/ $1/g; |
|
281
|
|
|
|
|
|
|
# Do the minimal amount of commas |
|
282
|
0
|
0
|
0
|
|
|
0
|
if ($jr) { |
|
|
|
0
|
|
|
|
|
|
|
283
|
0
|
|
|
|
|
0
|
$n = $von . ' ' . $last . ', ' . $jr . ', ' . $first; |
|
284
|
|
|
|
|
|
|
} elsif ( ($last =~ /\S\s+\S/) && ($last !~ /^{.*}$/) ) { |
|
285
|
0
|
|
|
|
|
0
|
$n = $von . ' ' . $last . ', ' . $first; |
|
286
|
|
|
|
|
|
|
} else { |
|
287
|
0
|
|
|
|
|
0
|
$n = join(' ', $first, $von, $last); |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
} elsif ($how =~ /^plain/) { |
|
290
|
|
|
|
|
|
|
# plain: "First von Last, Jr" for each name |
|
291
|
304
|
|
|
|
|
464
|
$n = $first; |
|
292
|
304
|
100
|
|
|
|
531
|
$n .= " $von " if $von; |
|
293
|
304
|
100
|
|
|
|
689
|
$n .= " $last" if $last; |
|
294
|
304
|
50
|
|
|
|
557
|
$n .= ", $jr" if $jr; |
|
295
|
|
|
|
|
|
|
} elsif ($how =~ /^reverse2/) { |
|
296
|
|
|
|
|
|
|
# This is "Last, First von, Jr." order. |
|
297
|
0
|
|
|
|
|
0
|
$n = "$last"; |
|
298
|
0
|
0
|
0
|
|
|
0
|
$n .= "," if ($first || $von || $jr); |
|
|
|
|
0
|
|
|
|
|
|
299
|
0
|
0
|
|
|
|
0
|
$n .= " $first" if $first; |
|
300
|
0
|
0
|
|
|
|
0
|
$n .= " $von" if $von; |
|
301
|
0
|
0
|
|
|
|
0
|
$n .= ", $jr" if $jr; |
|
302
|
|
|
|
|
|
|
} elsif ($how =~ /^reverse/) { |
|
303
|
|
|
|
|
|
|
# This is "von Last, First, Jr." order. |
|
304
|
0
|
|
|
|
|
0
|
$n = "$von $last"; |
|
305
|
0
|
0
|
0
|
|
|
0
|
$n .= ", $first" if ($first || $jr); |
|
306
|
0
|
0
|
|
|
|
0
|
$n .= ", $jr" if $jr; |
|
307
|
|
|
|
|
|
|
} elsif ($how =~ /^lname1/) { |
|
308
|
|
|
|
|
|
|
# lname1 : First author has last name first, the rest are in normal order. |
|
309
|
|
|
|
|
|
|
# Personally I hate this style, but its common in ecology. |
|
310
|
0
|
0
|
|
|
|
0
|
$last .= ", $jr" if $jr; |
|
311
|
0
|
0
|
|
|
|
0
|
if ($namenum == 1) { |
|
312
|
0
|
0
|
|
|
|
0
|
$last = join(' ', $von, $last) if ($von); |
|
313
|
0
|
0
|
|
|
|
0
|
if ($first) { |
|
314
|
0
|
|
|
|
|
0
|
$n = join(', ', $last, $first); |
|
315
|
|
|
|
|
|
|
} else { |
|
316
|
0
|
|
|
|
|
0
|
$n = $last; |
|
317
|
|
|
|
|
|
|
} |
|
318
|
|
|
|
|
|
|
} else { |
|
319
|
0
|
|
|
|
|
0
|
$n = join(' ', $first, $von, $last); |
|
320
|
|
|
|
|
|
|
} |
|
321
|
|
|
|
|
|
|
# unknown name style |
|
322
|
|
|
|
|
|
|
} else { |
|
323
|
0
|
|
|
|
|
0
|
return &bib'goterror("canon_to_name doesn't know form: $how"); |
|
324
|
|
|
|
|
|
|
} |
|
325
|
304
|
|
|
|
|
381
|
$n =~ s/ \240/ /g; |
|
326
|
304
|
|
|
|
|
463
|
$n =~ s/^\s+//; |
|
327
|
304
|
|
|
|
|
671
|
$n =~ s/\s+$//; |
|
328
|
304
|
|
|
|
|
927
|
$n =~ s/\s+/ /g; |
|
329
|
304
|
|
|
|
|
688
|
push(@names, $n); |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
|
|
332
|
120
|
50
|
|
|
|
267
|
if (wantarray) { |
|
333
|
0
|
|
|
|
|
0
|
@names; |
|
334
|
|
|
|
|
|
|
} else { |
|
335
|
|
|
|
|
|
|
# They want the complete string accoring to the style they asked for. |
|
336
|
120
|
50
|
|
|
|
572
|
if ($how =~ /lname1|plain/) { |
|
337
|
120
|
100
|
|
|
|
316
|
if (@names <= 2) { |
|
338
|
64
|
|
|
|
|
142
|
$n = join(' and ', @names); |
|
339
|
|
|
|
|
|
|
} else { |
|
340
|
56
|
|
|
|
|
103
|
$lname = pop(@names); |
|
341
|
56
|
|
|
|
|
206
|
$n = join(', ', @names) . ', and ' . $lname; |
|
342
|
|
|
|
|
|
|
} |
|
343
|
|
|
|
|
|
|
} else { |
|
344
|
0
|
|
|
|
|
0
|
$n = join(' and ', @names); |
|
345
|
|
|
|
|
|
|
} |
|
346
|
120
|
|
|
|
|
710
|
$n; |
|
347
|
|
|
|
|
|
|
} |
|
348
|
|
|
|
|
|
|
} |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
# XXXXX Obsolete? |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub parsename { |
|
353
|
0
|
|
|
0
|
|
0
|
local($name, $how) = @_; |
|
354
|
|
|
|
|
|
|
|
|
355
|
0
|
|
|
|
|
0
|
&canon_to_name( &mname_to_canon($name), $how); |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
######### |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# |
|
362
|
|
|
|
|
|
|
# parsedate takes a date and returns a list of month, year. |
|
363
|
|
|
|
|
|
|
# |
|
364
|
|
|
|
|
|
|
# taken from r2b |
|
365
|
|
|
|
|
|
|
# |
|
366
|
|
|
|
|
|
|
# date looks like month dec year |
|
367
|
|
|
|
|
|
|
# -------------------------------- ------------------- -- --------------- |
|
368
|
|
|
|
|
|
|
# 1984 84 1984 |
|
369
|
|
|
|
|
|
|
# 1974-1975 74 1974-1975 |
|
370
|
|
|
|
|
|
|
# August 1984 aug 84 1984 |
|
371
|
|
|
|
|
|
|
# May 1984 May 1984 may 84 1984 |
|
372
|
|
|
|
|
|
|
# 1976 November nov 76 1976 |
|
373
|
|
|
|
|
|
|
# 1976 November 1976 nov 76 1976 |
|
374
|
|
|
|
|
|
|
# 21 August 1984 {21 August} 84 1984 |
|
375
|
|
|
|
|
|
|
# August 18-21, 1984 {August 18-21} 84 1984 |
|
376
|
|
|
|
|
|
|
# 18-21 August 1991 {18-21 August} 91 1991 |
|
377
|
|
|
|
|
|
|
# July 31-August 4, 1984 1984 {July 31-August 4} 84 1984 |
|
378
|
|
|
|
|
|
|
# July-August 1980 {July-August} 80 1980 |
|
379
|
|
|
|
|
|
|
# February 1984 (revised May 1991) feb 84 1984 |
|
380
|
|
|
|
|
|
|
# Winter 1990 {Winter} 90 1990 |
|
381
|
|
|
|
|
|
|
# 1988 (in press) 88 1988 (in press) |
|
382
|
|
|
|
|
|
|
# to appear ?? to appear |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
sub parsedate { |
|
385
|
0
|
|
|
0
|
|
0
|
local($date) = @_; |
|
386
|
0
|
|
|
|
|
0
|
local($year) = undef; |
|
387
|
0
|
|
|
|
|
0
|
local($month); |
|
388
|
0
|
|
|
|
|
0
|
local($old_date) = $date; |
|
389
|
|
|
|
|
|
|
|
|
390
|
0
|
0
|
|
|
|
0
|
return (undef, undef) unless defined $date; |
|
391
|
|
|
|
|
|
|
|
|
392
|
0
|
|
|
|
|
0
|
$date =~ s/(\S+)\s+(\d+)\s+\1\s+\2/$1 $2/; # handle duplicate dates |
|
393
|
0
|
|
|
|
|
0
|
$date =~ s/^\s*(\d\d\d+)\s+(\S+)/$2 $1/; # handle 1976 November |
|
394
|
0
|
|
|
|
|
0
|
while ($date =~ /\s*[(]?((\d\d\d\d[-\/])?\d\d\d\d)[).]?\s*(\(.*\))?$/) { |
|
395
|
0
|
|
|
|
|
0
|
$year = $1; |
|
396
|
0
|
|
|
|
|
0
|
$date =~ s/,?\s*[(]?(\d\d\d\d[-\/])?\d\d\d\d[).]?\s*(\(.*\))?$//; |
|
397
|
|
|
|
|
|
|
} |
|
398
|
|
|
|
|
|
|
|
|
399
|
0
|
|
|
|
|
0
|
$month = &canon_month($date); |
|
400
|
|
|
|
|
|
|
|
|
401
|
0
|
0
|
0
|
|
|
0
|
if ($month !~ /\S/) { |
|
|
|
0
|
|
|
|
|
|
|
402
|
0
|
|
|
|
|
0
|
undef $month; |
|
403
|
|
|
|
|
|
|
} elsif ( (!defined $year) && ($month eq $date) ) { |
|
404
|
0
|
|
|
|
|
0
|
$year = $old_date; |
|
405
|
0
|
|
|
|
|
0
|
undef $month; |
|
406
|
|
|
|
|
|
|
} |
|
407
|
0
|
|
|
|
|
0
|
($month, $year); |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
%month_table = ( |
|
411
|
|
|
|
|
|
|
'apr', 'April', |
|
412
|
|
|
|
|
|
|
'aug', 'August', |
|
413
|
|
|
|
|
|
|
'dec', 'December', |
|
414
|
|
|
|
|
|
|
'feb', 'February', |
|
415
|
|
|
|
|
|
|
'jan', 'January', |
|
416
|
|
|
|
|
|
|
'jul', 'July', |
|
417
|
|
|
|
|
|
|
'jun', 'June', |
|
418
|
|
|
|
|
|
|
'mar', 'March', |
|
419
|
|
|
|
|
|
|
'may', 'May', |
|
420
|
|
|
|
|
|
|
'nov', 'November', |
|
421
|
|
|
|
|
|
|
'oct', 'October', |
|
422
|
|
|
|
|
|
|
'sep', 'September', |
|
423
|
|
|
|
|
|
|
); |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
sub canon_month { |
|
426
|
52
|
|
|
52
|
|
113
|
local($month) = @_; |
|
427
|
|
|
|
|
|
|
|
|
428
|
52
|
100
|
|
|
|
229
|
return $month if $month =~ /[\d\/\-]/; |
|
429
|
|
|
|
|
|
|
|
|
430
|
30
|
|
|
|
|
83
|
local($canm) = substr($month, 0, 3); |
|
431
|
|
|
|
|
|
|
|
|
432
|
30
|
|
|
|
|
62
|
$canm =~ tr/A-Z/a-z/; |
|
433
|
|
|
|
|
|
|
|
|
434
|
30
|
50
|
|
|
|
109
|
return $month unless defined $month_table{$canm}; |
|
435
|
|
|
|
|
|
|
|
|
436
|
30
|
|
|
|
|
248
|
$canm; |
|
437
|
|
|
|
|
|
|
} |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
sub output_month { |
|
440
|
0
|
|
|
0
|
|
|
local($canm, $how) = @_; |
|
441
|
0
|
|
|
|
|
|
local($outm) = $month_table{$canm}; |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
# we don't know what they have |
|
444
|
0
|
0
|
|
|
|
|
return $canm unless defined $outm; |
|
445
|
|
|
|
|
|
|
|
|
446
|
0
|
0
|
0
|
|
|
|
if ( ($how eq 'short') && (length($outm) > 4) ) { |
|
447
|
0
|
|
|
|
|
|
substr($outm, 3) = '.'; |
|
448
|
|
|
|
|
|
|
} |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# 'long' format |
|
451
|
0
|
|
|
|
|
|
$outm; |
|
452
|
|
|
|
|
|
|
} |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub output_date { |
|
455
|
0
|
|
|
0
|
|
|
local($mo, $yr, $how) = @_; |
|
456
|
0
|
|
|
|
|
|
local($date); |
|
457
|
|
|
|
|
|
|
|
|
458
|
0
|
0
|
|
|
|
|
$how = 'short' unless defined $how; |
|
459
|
|
|
|
|
|
|
|
|
460
|
0
|
0
|
|
|
|
|
if (defined $mo) { |
|
461
|
0
|
|
|
|
|
|
$mo = &bp_util'output_month($mo, $how); |
|
462
|
0
|
0
|
|
|
|
|
if (defined $yr) { |
|
463
|
0
|
|
|
|
|
|
$date = "$mo $yr"; |
|
464
|
|
|
|
|
|
|
} else { |
|
465
|
0
|
|
|
|
|
|
$date = $mo; |
|
466
|
|
|
|
|
|
|
} |
|
467
|
|
|
|
|
|
|
} else { |
|
468
|
0
|
0
|
|
|
|
|
$date = $yr if defined $yr; |
|
469
|
|
|
|
|
|
|
} |
|
470
|
|
|
|
|
|
|
|
|
471
|
0
|
|
|
|
|
|
$date; |
|
472
|
|
|
|
|
|
|
} |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
# |
|
475
|
|
|
|
|
|
|
# Generates a key for a canonical record. |
|
476
|
|
|
|
|
|
|
# |
|
477
|
|
|
|
|
|
|
# XXXXX This should take an option string and parse it to generate a key. |
|
478
|
|
|
|
|
|
|
# |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
sub genkey { |
|
481
|
0
|
|
|
0
|
|
|
local(%cent) = @_; |
|
482
|
0
|
|
|
|
|
|
local($key, $keytype, $sy); |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
# first pick out the field we're going to use |
|
485
|
|
|
|
|
|
|
GETKEY: { |
|
486
|
0
|
|
|
|
|
|
defined $cent{'Authors'} && do |
|
487
|
0
|
0
|
|
|
|
|
{ $keytype = 'author'; $key = $cent{'Authors'}; last GETKEY; }; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
defined $cent{'CorpAuthor'} && do |
|
489
|
0
|
0
|
|
|
|
|
{ $keytype = 'org'; $key = $cent{'CorpAuthor'}; last GETKEY; }; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
defined $cent{'Editors'} && do |
|
491
|
0
|
0
|
|
|
|
|
{ $keytype = 'author'; $key = $cent{'Editors'}; last GETKEY; }; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
defined $cent{'Publisher'} && do |
|
493
|
0
|
0
|
|
|
|
|
{ $keytype = 'org'; $key = $cent{'Publisher'}; last GETKEY; }; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
defined $cent{'Organization'} && do |
|
495
|
0
|
0
|
|
|
|
|
{ $keytype = 'org'; $key = $cent{'Organization'}; last GETKEY; }; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
# nothing defined |
|
497
|
0
|
|
|
|
|
|
$keytype = 'text'; $key = "Anonymous"; |
|
|
0
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
} |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
# next we want to reduce the name to a reasonable key |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
#print STDERR "$key -> "; |
|
503
|
|
|
|
|
|
|
|
|
504
|
0
|
0
|
|
|
|
|
if ($keytype eq 'author') { |
|
|
|
0
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
# # turn "Stephen van Rensselaer, Jr." into "vanRensselaerJr". |
|
506
|
|
|
|
|
|
|
# #$key =~ s/^([^\/]*)\/([^\/]*)\/([^\/]*)\/([^\|]*).*/$2$1$4/; |
|
507
|
|
|
|
|
|
|
# # turn "Stephen van Rensselaer, Jr." into "Rensselaer" |
|
508
|
|
|
|
|
|
|
# #$key =~ s/^([^\/]*)\/.*/$1/; |
|
509
|
|
|
|
|
|
|
# Remove everything past the first seperator |
|
510
|
0
|
|
|
|
|
|
local($split_sep) = index($key, $bib'cs_sep2); |
|
511
|
0
|
0
|
|
|
|
|
substr($key, $split_sep) = '' if $split_sep >= $[; |
|
512
|
|
|
|
|
|
|
} elsif ($keytype eq 'org') { |
|
513
|
0
|
|
|
|
|
|
$key =~ s/^(\S*).*/$1/; |
|
514
|
|
|
|
|
|
|
} else { |
|
515
|
|
|
|
|
|
|
# text |
|
516
|
|
|
|
|
|
|
} |
|
517
|
|
|
|
|
|
|
#print STDERR "$key -> "; |
|
518
|
0
|
|
|
|
|
|
$key = &bib'nocharset($key); |
|
519
|
|
|
|
|
|
|
#print STDERR "$key -> "; |
|
520
|
0
|
|
|
|
|
|
$key =~ tr/A-Za-z0-9\/\-//cd; |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
# reduce it to fit normal lengths |
|
523
|
0
|
0
|
|
|
|
|
substr($key, 14) = '' if length($key) > 14; |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
# Now find the year |
|
526
|
0
|
0
|
0
|
|
|
|
if ( (defined $cent{'Year'}) && ($cent{'Year'} =~ /(\d\d\d\d)/) ) { |
|
|
|
0
|
0
|
|
|
|
|
|
527
|
0
|
|
|
|
|
|
$sy = $1; |
|
528
|
|
|
|
|
|
|
} elsif ( (defined $cent{'Month'}) && ($cent{'Month'} =~ /(\d\d\d\d)/) ) { |
|
529
|
0
|
|
|
|
|
|
$sy = $1; |
|
530
|
|
|
|
|
|
|
} else { |
|
531
|
0
|
|
|
|
|
|
$sy = "????"; |
|
532
|
|
|
|
|
|
|
} |
|
533
|
|
|
|
|
|
|
# We lop off the century part |
|
534
|
0
|
|
|
|
|
|
substr($sy, 0, 2) = ''; |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# and add on the shortyear to the end of our key |
|
537
|
0
|
|
|
|
|
|
$key .= $sy; |
|
538
|
|
|
|
|
|
|
|
|
539
|
0
|
|
|
|
|
|
$key; |
|
540
|
|
|
|
|
|
|
} |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
# |
|
543
|
|
|
|
|
|
|
# Register a key in our global key registry, returning the possibly changed |
|
544
|
|
|
|
|
|
|
# key. All this does is maintain a registry of keys, and if there is already |
|
545
|
|
|
|
|
|
|
# a key that matches, it adds letters from a -> z -> aa -> az -> ba -> bz -> ... |
|
546
|
|
|
|
|
|
|
# to the end of the key. A format uses these routines with something like: |
|
547
|
|
|
|
|
|
|
# |
|
548
|
|
|
|
|
|
|
# $can{'CiteKey'} = &bp_util'genkey(%can) unless defined $can{'CiteKey'}; |
|
549
|
|
|
|
|
|
|
# $can{'CiteKey'} = &bp_util'regkey($can{'CiteKey'}); |
|
550
|
|
|
|
|
|
|
# |
|
551
|
|
|
|
|
|
|
# in it's fromcanon routines. This generates a key if necessary, and then |
|
552
|
|
|
|
|
|
|
# registers it. A format may wish to do its own key generation, or even |
|
553
|
|
|
|
|
|
|
# throw out the citekey it was given and make a new one, so generation and |
|
554
|
|
|
|
|
|
|
# registration are seperate routines. |
|
555
|
|
|
|
|
|
|
# |
|
556
|
|
|
|
|
|
|
# It is recommended that keys be registered here rather than in the format, as |
|
557
|
|
|
|
|
|
|
# we would like one registry even for multiple formats. |
|
558
|
|
|
|
|
|
|
# |
|
559
|
|
|
|
|
|
|
# XXXXX is this necessary? This goes to an output routine after all. As long |
|
560
|
|
|
|
|
|
|
# as they register them all, or none, do we care? |
|
561
|
|
|
|
|
|
|
# |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
sub regkey { |
|
564
|
0
|
|
|
0
|
|
|
local($key) = @_; |
|
565
|
0
|
|
|
|
|
|
local($rkey, $nextkey, $rkeylen); |
|
566
|
|
|
|
|
|
|
|
|
567
|
0
|
|
|
|
|
|
$rkey = $key; |
|
568
|
0
|
|
|
|
|
|
$rkey =~ tr/A-Z/a-z/; |
|
569
|
0
|
|
|
|
|
|
$rkeylen = length($rkey); |
|
570
|
|
|
|
|
|
|
|
|
571
|
0
|
0
|
|
|
|
|
if (defined $glb_keyreg{$rkey}) { |
|
572
|
0
|
|
|
|
|
|
$nextkey = $key . 'a'; |
|
573
|
0
|
|
|
|
|
|
while (defined $glb_keyreg{$nextkey}) { |
|
574
|
|
|
|
|
|
|
# increment the characters after the key, 'z'+1 -> 'aa'. |
|
575
|
0
|
|
|
|
|
|
substr($nextkey, $rkeylen)++; |
|
576
|
|
|
|
|
|
|
} |
|
577
|
|
|
|
|
|
|
# going to put ourselves in $nextkey |
|
578
|
0
|
|
|
|
|
|
$glb_keyreg{$nextkey} = 1; |
|
579
|
|
|
|
|
|
|
# key has changed, so update it for the output. |
|
580
|
0
|
|
|
|
|
|
$key .= substr($nextkey, $rkeylen); |
|
581
|
|
|
|
|
|
|
} else { |
|
582
|
0
|
|
|
|
|
|
$glb_keyreg{$rkey} = 1; |
|
583
|
|
|
|
|
|
|
# key is unchanged |
|
584
|
|
|
|
|
|
|
} |
|
585
|
|
|
|
|
|
|
|
|
586
|
0
|
|
|
|
|
|
$key; |
|
587
|
|
|
|
|
|
|
} |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
####################### |
|
590
|
|
|
|
|
|
|
# end of package |
|
591
|
|
|
|
|
|
|
####################### |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
1; |