File Coverage

blib/lib/BibTeX/Parser.pm

Criterion	Covered	Total	%
statement	148	185	80.0
branch	58	78	74.3
condition	18	36	50.0
subroutine	14	17	82.3
pod	7	10	70.0
total	245	326	75.1

line	stmt	bran	cond	sub	pod	time	code
1							package BibTeX::Parser;
2							{
3							$BibTeX::Parser::VERSION = '1.95';
4							}
5							# ABSTRACT: A pure perl BibTeX parser
6	20			20		1584656	use warnings;
	20					39
	20					919
7	20			20		116	use strict;
	20					29
	20					48073
8
9							require BibTeX::Parser::Entry; # mutual dependency, so use instead of require
10
11
12							my $re_namechar = qr/[a-zA-Z0-9\!\$\&\*\+\-\.\/\:\;\<\>\?\[\]\^\_\`\\|\']/o;
13							my $re_name = qr/$re_namechar+/o;
14
15
16							sub new {
17	16			16	1	1599854	my ( $class, $fh, $opts ) = @_;
18
19	16		50			369	return bless {
20							fh => $fh,
21							opts => $opts \|\| {},
22							strings => {
23							jan => "January",
24							feb => "February",
25							mar => "March",
26							apr => "April",
27							may => "May",
28							jun => "June",
29							jul => "July",
30							aug => "August",
31							sep => "September",
32							oct => "October",
33							nov => "November",
34							dec => "December",
35							"j-tugboat" => "TUGboat", # missing definition is irritating
36							},
37							line => -1,
38							buffer => "",
39							entries => {}
40							}, $class;
41							}
42
43							sub read {
44	1			1	1	10	my $self = shift;
45	1	50				6	if (!exists $self->{opts}->{errorlevel}) {
46	1					3	$self->{opts}->{errorlevel} = 'warn';
47							}
48	1					5	while (my $entry=$self->next) {
49	10	100				24	if ($entry->parse_ok) {
50	8					18	$self->{entries}->{$entry->key()} = $entry;
51							} else {
52	2	50				6	if ($self->{opts}->{errorlevel} eq 'warn') {
		0
53	2					87	warn "BibTeX Parser: Skipping entry in line $."
54							} elsif ($self->{opts}->{errorlevel} eq 'error') {
55	0					0	exit("BibTeX Parser: Error in line $.")
56							}
57							}
58							}
59							}
60
61							sub n {
62	1			1	1	20	my $self = shift;
63	1					2	return(scalar keys %{$self->{entries}});
	1					13
64							}
65
66							sub entrykeys {
67	1			1	1	2	my $self = shift;
68	1					3	my @result = keys %{$self->{entries}};
	1					5
69	1					11	return(\@result);
70							}
71
72							sub has {
73	2			2	1	5	my $self = shift;
74	2					3	my $key = shift;
75	2					10	return(exists($self->{entries}->{$key}));
76							}
77
78							sub entry {
79	1			1	1	2	my $self = shift;
80	1					14	my $key = shift;
81	1					6	return($self->{entries}->{$key});
82							}
83
84							sub _slurp_close_bracket;
85
86							sub _parse_next {
87	53			53		69	my $self = shift;
88
89	53					66	while (1) { # loop until regular entry is finished
90	57	100				307	return 0 if $self->{fh}->eof;
91	51					827	local $_ = $self->{buffer};
92
93	51					154	until (/@/m) {
94	136					459	my $line = $self->{fh}->getline;
95	136	100				292	return 0 unless defined $line;
96	132					173	$line =~ s/^%.*$//;
97	132					331	$_ .= $line;
98							}
99
100	47					205	my $current_entry = new BibTeX::Parser::Entry;
101	47	100				744	if (/@($re_name)/cgo) {
102	45					130	my $type = uc $1;
103	45					147	$current_entry->type( $type );
104	45					98	my $start_pos = pos($_) - length($type) - 1;
105
106							# read rest of entry (matches braces)
107	45					55	my $bracelevel = 0;
108	45					97	$bracelevel += tr/\{/\{/; #count braces
109	45					75	$bracelevel -= tr/\}/\}/;
110	45					106	while ( $bracelevel != 0 ) {
111	186					214	my $position = pos($_);
112	186					403	my $line = $self->{fh}->getline;
113	186	50				634	last unless defined $line;
114	186					261	$bracelevel =
115							$bracelevel + ( $line =~ tr/\{/\{/ ) - ( $line =~ tr/\}/\}/ );
116	186					395	$_ .= $line;
117	186					424	pos($_) = $position;
118							}
119
120							# Remember text before the entry
121	45					103	my $pre = substr($_, 0, $start_pos-1);
122	45	100				104	if ($start_pos == 0) {
123	10					15	$pre = '';
124							}
125	45					204	$current_entry->pre($pre);
126
127
128							# Remember raw bibtex code
129	45					84	my $raw = substr($_, $start_pos);
130	45					115	$raw =~ s/^\s+//;
131	45					339	$raw =~ s/\s+$//;
132	45					151	$current_entry->raw_bibtex($raw);
133
134	45					57	my $pos = pos $_;
135	45					89	tr/\n/ /;
136	45					84	pos($_) = $pos;
137
138	45	100	100			268	if ( $type eq "STRING" ) {
		100
139	2	50				95	if (/\G\{\s($re_name)\s=\s*/cgo) {
140	2					5	my $key = lc($1);
141							my $value = _parse_string( $self->{strings},
142	2					6	exists $self->{opts}->{"no-warn-ack"} );
143							# If redefining to the same value, don't worry.
144							# If redefining j-tugboat (predefined above), don't worry,
145							# people use \TUB vs. "TUGboat" arbitrarily.
146	2					3	my $old_value = $self->{strings}->{$key};
147	2	50	33			9	if ($key ne "j-tugboat"
			33
148							&& defined $old_value && $old_value ne $value) {
149	0					0	warn("Redefining string $key ",
150							"(oldvalue=$old_value, newvalue=$value");
151							}
152	2					4	$self->{strings}->{$key} = $value;
153	2					5	/\G[\s\n]*\}/cg;
154							} else {
155	0					0	$current_entry->error("Malformed string! ($raw)");
156	0					0	return $current_entry;
157							}
158							} elsif ( $type eq "COMMENT" or $type eq "PREAMBLE" ) {
159	2					5	/\G\{./cgo;
160	2					3	_slurp_close_bracket;
161							} else { # normal entry
162	41					128	$current_entry->parse_ok(1);
163
164							# parse key
165	41	100				1340	if (/\G\s\{(?:\s($re_name)\s,[\s\n]\|\s+\r?\s*)/cgo) {
166	39					150	$current_entry->key($1);
167
168							# fields
169	39					986	while (/\G[\s\n]($re_name)[\s\n]=[\s\n]*/cgo) {
170							$current_entry->field(
171							$1 => _parse_string( $self->{strings},
172	171					390	exists $self->{opts}->{"no-warn-ack"} ) );
173	171					273	my $idx = index( $_, ',', pos($_) );
174	171	100				777	pos($_) = $idx + 1 if $idx > 0;
175							}
176
177	39					227	return $current_entry;
178
179							} else {
180
181	2		50			55	$current_entry->error("Malformed entry (key contains invalid characters) at " . substr($_, pos($_) \|\| 0, 20) . ", ignoring");
182	2					7	_slurp_close_bracket;
183	2					16	return $current_entry;
184							}
185							}
186
187	4					21	$self->{buffer} = substr $_, pos($_);
188
189							} else {
190	2		50			23	$current_entry->error("Did not find type at " . substr($_, pos($_) \|\| 0, 20));
191	2					15	return $current_entry;
192							}
193
194							}
195							}
196
197
198							sub next {
199	53			53	1	5252	my $self = shift;
200
201	53					137	return $self->_parse_next;
202							}
203
204							# slurp everything till the next closing brace. Handles
205							# nested brackets
206							sub _slurp_close_bracket {
207	4			4		8	my $bracelevel = 0;
208							BRACE: {
209	4	50				5	/\G[^\}]*\{/cg && do { $bracelevel++; redo BRACE };
	4					22
	0					0
	0					0
210							/\G[^\{]*\}/cg
211	4	100				15	&& do {
212	2	50				4	if ( $bracelevel > 0 ) {
213	0					0	$bracelevel--;
214	0					0	redo BRACE;
215							} else {
216	2					2	return;
217							}
218							}
219							}
220							}
221
222							# parse bibtex string in $_ and return. A BibTeX string is either enclosed
223							# in double quotes '""' or matching braces '{}'. The braced form may contain
224							# nested braces.
225							#
226							# Second argument NO_WARN_ACK says whether to emit the warning
227							# "Using undefined string" if the name of the undefined string starts
228							# with "ack-". The default is to warn. The TUGboat config file
229							# ltx2crossrefxml-tugboat.cfg sets this.
230							#
231							# It is an unfortunate fact that people routinely copy bib entries
232							# without copying the "ack-nhfb" or other "ack-..." @string definitions
233							# in Nelson Beebe's bibliography files, resulting in this warning. It is
234							# too irritating to have to define them (they are never used), and also
235							# too irritating to have to see the many warnings on every run.
236							#
237							# Similarly for j-TUGboat, which we predefine in the new() fn above.
238							#
239							sub _parse_string {
240	180			180		133731	my ($strings_ref, $no_warn_ack) = @_;
241	180		50			635	$no_warn_ack \|\|= 0;
242
243	180					218	my $value = "";
244
245							PART: {
246	180	100				198	if (/\G(\d+)/cg) {
	187	100				1267
		100
247	12					72	$value .= $1;
248							} elsif (/\G($re_name)/cgo) {
249	10					22	my $key = lc($1);
250	10	50				45	if (! defined $strings_ref->{lc($1)}) {
251							#debug_hash("looking up $key for $1", $strings_ref);
252	0	0	0			0	warn("Using undefined string $1 (", lc($1), ") in: $_")
253							unless $no_warn_ack && $1 =~ /^ack-/;
254							}
255	10		50			30	$value .= $strings_ref->{$1} \|\| "";
256							} elsif (/\G"(([^"\\](\\.)[^\\"]))"/cgs)
257							{ # quoted string with embedded escapes
258	117					300	$value .= $1;
259							} else {
260	48					103	my $part = _extract_bracketed( $_ );
261	48					90	$value .= substr $part, 1, length($part) - 2; # strip quotes
262							}
263
264	187	100				389	if (/\G\s#\s/cg) { # string concatenation by #
265	7					16	redo PART;
266							}
267							}
268	180					538	$value =~ s/[\s\n]+/ /g;
269	180					569	return $value;
270							}
271
272							sub _extract_bracketed
273							{
274	48			48		86	for($_[0]) # alias to $_
275							{
276	48					70	/\G\s+/cg;
277	48					58	my $start = pos($_);
278	48					77	my $depth = 0;
279	48					52	while(1)
280							{
281	175	100				258	/\G\\./cg && next;
282	173	100				297	/\G\{/cg && (++$depth, next);
283	118	100				247	/\G\}/cg && (--$depth > 0 ? next : last);
		100
284	63	50				129	/\G([^\\\{\}]+)/cg && next;
285	0					0	last; # end of string
286							}
287	48					127	return substr($_, $start, pos($_)-$start);
288							}
289							}
290
291							# Split the $string using $pattern as a delimiter with
292							# each part having balanced braces (so "{$pattern}"
293							# does NOT split).
294							# Return empty list if unmatched braces
295
296							sub _split_braced_string {
297	248			248		124536	my $string = shift;
298	248					324	my $pattern = shift;
299	248					344	my @tokens;
300	248	100				491	return () if $string eq '';
301	247					292	my $buffer;
302	247		100			583	while (!defined pos $string \|\| pos $string < length $string) {
303	422	100				11448	if ( $string =~ /\G(.*?)(\{\|$pattern)/cgi ) {
304	193					489	my $match = $1;
305	193	100				2825	if ( $2 =~ /$pattern/i ) {
		50
306	164					379	$buffer .= $match;
307	164					297	push @tokens, $buffer;
308	164					856	$buffer = "";
309							} elsif ( $2 =~ /\{/ ) {
310	29					68	$buffer .= $match . "{";
311	29					46	my $numbraces=1;
312	29		100			132	while ($numbraces !=0 && pos $string < length $string) {
313	345					491	my $symbol = substr($string, pos $string, 1);
314	345					414	$buffer .= $symbol;
315	345	50				633	if ($symbol eq '{') {
		100
316	0					0	$numbraces ++;
317							} elsif ($symbol eq '}') {
318	27					37	$numbraces --;
319							}
320	345					1136	pos($string) ++;
321							}
322	29	100				159	if ($numbraces != 0) {
323	2					13	return ();
324							}
325							} else {
326	0					0	$buffer .= $match;
327							}
328							} else {
329	229		100			916	$buffer .= substr $string, (pos $string \|\| 0);
330	229					354	last;
331							}
332							}
333	245	50				749	push @tokens, $buffer if $buffer;
334	245					1348	return @tokens;
335							}
336
337
338							#
339							sub debug_hash {
340	0			0	0		my ($label) = shift;
341	0	0	0				my (%hash) = (ref $_[0] && $_[0] =~ /.HASH./) ? %{$_[0]} : @_;
	0
342
343	0						my $str = "$label: {";
344	0						my @items = ();
345	0						for my $key (sort keys %hash) {
346	0						my $val = $hash{$key};
347	0						$key =~ s/\n/\\n/g;
348	0						$val =~ s/\n/\\n/g;
349	0						push (@items, "$key:$val");
350							}
351	0						$str .= join (",", @items);
352	0						$str .= "}";
353
354	0						warn ($str);
355							}
356
357							#
358							sub debug_list {
359	0			0	0		my ($label) = shift;
360	0	0	0				my (@list) = (ref $_[0] && $_[0] =~ /.ARRAY./) ? @{$_[0]} : @_;
	0
361
362	0						my $str = "$label [" . join (",", @list) . "]";
363	0						warn $str;
364							}
365
366
367							# Return string representation of call stack for debugging.
368							#
369							sub backtrace {
370	0			0	0		my $ret = "";
371
372	0						my ($line, $subr);
373	0						my $stackframe = 1; # skip ourselves
374	0						while ((undef,undef,$line,$subr) = caller ($stackframe)) {
375	0						$ret .= " -> $subr.$line";
376	0						$stackframe++;
377							}
378
379	0						return $ret;
380							}
381
382
383							1; # End of BibTeX::Parser
384
385
386							__END__