File Coverage

blib/lib/BibTeX/Parser/Author.pm

Criterion	Covered	Total	%
statement	107	154	69.4
branch	44	78	56.4
condition	2	18	11.1
subroutine	15	17	88.2
pod	7	7	100.0
total	175	274	63.8

line	stmt	bran	cond	sub	pod	time	code
1							package BibTeX::Parser::Author;
2							{
3							$BibTeX::Parser::Author::VERSION = '1.04';
4							}
5
6	18			18		140240	use warnings;
	18					51
	18					579
7	18			18		97	use strict;
	18					42
	18					338
8
9	18			18		922	use BibTeX::Parser;
	18					33
	18					540
10
11
12							use overload
13	18			18		22066	'""' => \&to_string;
	18					18298
	18					147
14
15
16
17							sub new {
18	68			68	1	19070	my $class = shift;
19
20	68	100				187	if (@_) {
21	65					184	my $self = [ $class->split(@_) ];
22	65					316	return bless $self, $class;
23							} else {
24	3					16	return bless [], $class;
25							}
26							}
27
28							sub _get_or_set_field {
29	406			406		737	my ($self, $field, $value) = @_;
30	406	100				707	if (defined $value) {
31	4					20	$self->[$field] = $value;
32							} else {
33	402					1269	return $self->[$field];
34							}
35							}
36
37
38							sub first {
39	140			140	1	22334	shift->_get_or_set_field(0, @_);
40							}
41
42
43							sub von {
44	86			86	1	196	shift->_get_or_set_field(1, @_);
45							}
46
47
48							sub last {
49	94			94	1	8661	shift->_get_or_set_field(2, @_);
50							}
51
52
53							sub jr {
54	86			86	1	204	shift->_get_or_set_field(3, @_);
55							}
56
57
58							# Take a string and create an array [first, von, last, jr]
59							sub split {
60	98			98	1	11355	my ($self_or_class, $name) = @_;
61
62							# remove whitespace at start and end of string
63	98					675	$name =~ s/^\s(.)\s*$/$1/s;
64
65
66
67	98	100				292	if (!length($name)) {
68	6					23	return (undef, undef, undef, undef);
69							}
70
71	92					262	my @comma_separated =
72							BibTeX::Parser::_split_braced_string($name,
73							'\s,\s');
74	92	50				244	if (scalar(@comma_separated) == 0) {
75							# Error?
76	0					0	return (undef, undef, undef, undef);
77							}
78
79	92					155	my $first=undef;
80	92					134	my $von=undef;
81	92					134	my $last=undef;
82	92					123	my $jr=undef;
83
84	92	100				197	if (scalar(@comma_separated) == 1) {
85							# First von Last form
86	65					200	my @tokens =
87							BibTeX::Parser::_split_braced_string($name, '\s+');
88	65	50				178	if (!scalar (@tokens)) {
89	0					0	return (undef, undef, undef, undef);
90							}
91	65					170	my ($start_von, $start_last) = _getStartVonLast (@tokens);
92	65	100				158	if ($start_von >0) {
93	55					166	$first = join(' ', splice(@tokens,0,$start_von));
94							}
95	65	100				168	if (($start_last-$start_von) >0) {
96	9					21	$von = join(' ', splice(@tokens,0,$start_last-$start_von));
97							}
98	65					141	$last = join(' ',@tokens);
99	65					364	return ($first, $von, $last, $jr);
100							}
101							# Now we work with von Last, [Jr,] First form
102	27	100				62	if (scalar @comma_separated == 2) { # no jr
103	21					64	my @tokens=
104							BibTeX::Parser::_split_braced_string($comma_separated[1], '\s+');
105	21					64	$first = join(' ', @tokens);
106							} else { # jr is present
107	6					18	my @tokens=
108							BibTeX::Parser::_split_braced_string($comma_separated[1], '\s+');
109	6					18	$jr = join(' ', @tokens);
110	6					15	@tokens=
111							BibTeX::Parser::_split_braced_string($comma_separated[2], '\s+');
112	6					15	$first = join(' ', @tokens);
113							}
114	27					135	my @tokens =
115							BibTeX::Parser::_split_braced_string($comma_separated[0], '\s+');
116	27					70	my $start_last = _getStartLast(@tokens);
117	27	100				63	if ($start_last > 0) {
118	10					27	$von=join(' ', splice(@tokens,0,$start_last));
119							}
120	27					53	$last = join(' ',@tokens);
121	27					141	return ($first, $von, $last, $jr);
122
123							}
124
125							# Return the index of the first von element and the first lastname
126							# element. If no von element, von=last
127
128							sub _getStartVonLast {
129	65			65		123	my $length=scalar(@_);
130	65	100				160	if ($length==1) {
131	8					22	return (0,0);
132							}
133	57					90	my $start_von=-1;
134	57					101	my $start_last=$length-1;
135	57					145	for (my $i=0; $i<$length; $i++) {
136	130	100				294	if (_is_von_token($_[$i])) {
137	14					22	$start_von=$i;
138	14					26	last;
139							}
140							}
141	57	100				138	if ($start_von== -1) { # no von part
142	43					126	return($length-1, $length-1);
143							}
144	14	100				30	if ($start_von== $length-1) { # all parts but last are upper case?
145	5					14	return($length-1, $length-1);
146							}
147	9					28	for (my $i=$start_von+1; $i<$length; $i++) {
148	13	100				33	if (!_is_von_token($_[$i])) {
149	9					16	$start_last=$i;
150	9					12	last;
151							}
152							}
153	9					22	return($start_von, $start_last);
154							}
155
156
157							# Return the index of the first lastname
158							# element provided no first name elements are present
159
160							sub _getStartLast {
161	27			27		50	my $length=scalar(@_);
162	27	100				66	if ($length==1) {
163	15					32	return 0;
164							}
165	12					34	my $start_last=$length-1;
166	12					33	for (my $i=0; $i<$length; $i++) {
167	22	100				46	if (!_is_von_token($_[$i])) {
168	12					15	$start_last=$i;
169	12					18	last;
170							}
171							}
172	12					22	return $start_last;
173							}
174
175
176							sub _split_name_parts {
177	0			0		0	my $name = shift;
178
179	0	0				0	if ( $name !~ /\{/ ) {
180	0					0	return split /\s+/, $name;
181							} else {
182	0					0	my @parts;
183	0					0	my $cur_token = '';
184	0					0	while ( scalar( $name =~ /\G ( [^\s\{]* ) ( \s+ \| \{ \| \s* $ ) /xgc ) ) {
185	0					0	$cur_token .= $1;
186	0	0				0	if ( $2 =~ /\{/ ) {
187	0	0				0	if ( scalar( $name =~ /\G([^\}]*)\}/gc ) ) {
188	0					0	$cur_token .= "{$1}";
189							} else {
190	0					0	die "Unmatched brace in name '$name'";
191							}
192							} else {
193	0	0				0	if ( $cur_token =~ /^{(.*)}$/ ) {
194	0					0	$cur_token = $1;
195							}
196	0					0	push @parts, $cur_token;
197	0					0	$cur_token = '';
198							}
199							}
200	0					0	return @parts;
201							}
202
203							}
204
205
206							sub _get_single_author_from_tokens {
207	0			0		0	my (@tokens) = @_;
208	0	0				0	if (@tokens == 0) {
		0
		0
209	0					0	return (undef, undef, undef, undef);
210							} elsif (@tokens == 1) { # name without comma
211	0	0				0	if ( $tokens[0] =~ /(^\|\s)[[:lower:]]/) { # name has von part or has only lowercase names
212	0					0	my @name_parts = _split_name_parts $tokens[0];
213
214	0					0	my $first;
215	0		0			0	while (@name_parts && ucfirst($name_parts[0]) eq $name_parts[0] ) {
216	0	0				0	$first .= $first ? ' ' . shift @name_parts : shift @name_parts;
217							}
218
219	0					0	my $von;
220							# von part are lowercase words
221	0		0			0	while ( @name_parts && lc($name_parts[0]) eq $name_parts[0] ) {
222	0	0				0	$von .= $von ? ' ' . shift @name_parts : shift @name_parts;
223							}
224
225	0	0				0	if (@name_parts) {
226	0					0	return ($first, $von, join(" ", @name_parts), undef);
227							} else {
228	0					0	return (undef, undef, $tokens[0], undef);
229							}
230							} else {
231	0	0	0			0	if ( $tokens[0] !~ /\{/ && $tokens[0] =~ /^((.*)\s+)?\b(\S+)$/) {
232	0					0	return ($2, undef, $3, undef);
233							} else {
234	0					0	my @name_parts = _split_name_parts $tokens[0];
235	0					0	return ($name_parts[0], undef, $name_parts[1], undef);
236							}
237							}
238
239							} elsif (@tokens == 2) {
240	0					0	my @von_last_parts = _split_name_parts $tokens[0];
241	0					0	my $von;
242							# von part are lowercase words
243	0		0			0	while ( @von_last_parts && lc($von_last_parts[0]) eq $von_last_parts[0] ) {
244	0	0				0	$von .= $von ? ' ' . shift @von_last_parts : shift @von_last_parts;
245							}
246	0					0	return ($tokens[1], $von, join(" ", @von_last_parts), undef);
247							} else {
248	0					0	my @von_last_parts = _split_name_parts $tokens[0];
249	0					0	my $von;
250							# von part are lowercase words
251	0		0			0	while ( @von_last_parts && lc($von_last_parts[0]) eq $von_last_parts[0] ) {
252	0	0				0	$von .= $von ? ' ' . shift @von_last_parts : shift @von_last_parts;
253							}
254	0					0	return ($tokens[2], $von, join(" ", @von_last_parts), $tokens[1]);
255							}
256
257							}
258
259
260
261							# The goal is to return a name in form
262							# von Last, Jr, First
263							# where any of the parts except Last may be empty.
264							#
265							sub to_string {
266	46			46	1	2172	my $self = shift;
267
268	46					101	my $last = $self->last; # assume always present
269	46	50				94	my $first = $self->first ? (", " . $self->first) : ''; # ", first"
270	46	50				101	my $von = $self->von ? ($self->von . " ") : ''; # "von "
271	46	50				93	my $jr = $self->jr ? (", " . $self->jr ) : ''; # ", jr"
272							#
273	46					93	my $ret = "${von}${last}${jr}${first}";
274							#warn "returning name: $ret\n";
275	46					142	return $ret;
276
277							# original code, which introduced a spurious space with a von part.
278							# https://github.com/borisveytsman/crossrefware/issues/11
279							#
280							# if ($self->jr) {
281							# return () . " " . $self->last . ", " . $self->jr . ", " . $self->first;
282							# } else {
283							# return ($self->von ? $self->von . " " : '') . $self->last . ($self->first ? ", " . $self->first : '');
284							# }
285							#
286							}
287
288
289							# Return 1 if the first letter on brace level 0 is lowercase
290							sub _is_von_token {
291	170			170		390	my $string = shift;
292	170					555	while ($string =~
293							s/^(\\[[:alpha:]]+\{\|\{\|\\[[:^alpha:]]?\|[[:^alpha:]])//) {
294	8	100				38	if ($1 eq '{' ) {
295	5					11	my $numbraces=1;
296	5		66			22	while ($numbraces !=0 && length($string)) {
297	66					111	my $symbol = substr($string, 0, 1);
298	66	50				138	if ($symbol eq '{') {
		100
299	0					0	$numbraces ++;
300							} elsif ($symbol eq '}') {
301	5					15	$numbraces --;
302							}
303	66					184	$string = substr($string,1);
304							}
305							}
306							}
307
308	170	100				329	if (length $string ) {
309	165					331	my $symbol = substr($string, 0, 1);
310	165	100				331	if (lc($symbol) eq $symbol) {
311	25					78	return 1;
312							} else {
313	140					424	return 0;
314							}
315							} else {
316	5					11	return 1;
317							}
318
319							}
320
321							1; # End of BibTeX::Entry
322
323							__END__