|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Biblio::Citation::Parser::Standard;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ######################################################################  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Biblio::Citation::Parser::Standard;  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ######################################################################  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  This file is part of ParaCite Tools (http://paracite.eprints.org/developers/)  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  Copyright (c) 2004 University of Southampton, UK. SO17 1BJ.  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  ParaTools is free software; you can redistribute it and/or modify  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  it under the terms of the GNU General Public License as published by  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  the Free Software Foundation; either version 2 of the License, or  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  (at your option) any later version.  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  ParaTools is distributed in the hope that it will be useful,  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  GNU General Public License for more details.  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  You should have received a copy of the GNU General Public License  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  along with ParaTools; if not, write to the Free Software  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ######################################################################  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 require Exporter;  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 @ISA = ("Exporter", "Biblio::Citation::Parser");  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
6352
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
95
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
1684
 | 
 use Biblio::Citation::Parser::Templates;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4395
 | 
    | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT_OK = ( 'parse', 'new' );  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B - citation parsing functionality  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Biblio::Citation::Parser::Standard;  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Parse a simple reference  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $parser = new Biblio::Citation::Parser::Standard;  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $metadata = $parser->parse("M. Jewell (2004) Citation Parsing for Beginners. Journal of Madeup References 4(3).");  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "The title of this article is ".$metadata->{atitle}."\n";  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Biblio::Citation::Parser::Standard uses a relatively simple template matching  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 technique to extract metadata from citations.  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The Templates.pm module currently provides almost 400 templates, with  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 more being added regularly, and the parser returns the metadata in a  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 form that is easily massaged into OpenURLs (see the Biblio::OpenURL  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 module for an even easier way).  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my %factors =  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_AUFIRST_"	=> 0.6,  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_AULAST_"	=> 0.6,  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_ISSN_"	=> 0.95,   | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_AUTHORS_"	=> 0.65,  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_EDITOR_"	=> 0.6,  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_DATE_"	=> 0.95,  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_YEAR_" 	=> 0.8,  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_SUBTITLE_" 	=> 0.6,  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_TITLE_" 	=> 0.6,  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_UCTITLE_" 	=> 0.7,  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_CAPTITLE_"	=> 0.7,  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_PUBLICATION_" => 0.65,  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_PUBLISHER_" 	=> 0.65,  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_PUBLOC_" 	=> 0.65,  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_UCPUBLICATION_" => 0.74,  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_CAPPUBLICATION_"	=> 0.7,   | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_CHAPTER_" 	=> 0.8,  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_VOLUME_" 	=> 0.8,  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_ISSUE_" 	=> 0.8,  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_PAGES_" 	=> 0.9,  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_ANY_" 	=> 0.05,  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_ISBN_"	=> 0.95,  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_ISSN_"	=> 0.95,  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_SPAGE_"	=> 0.8,  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_EPAGE_"	=> 0.8,  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	"_URL_"		=> 0.9,  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $parser = Biblio::Citation::Parser::Standard-Enew()  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The new() method creates a new parser.   | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
109
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
124
 | 
 	my($class) = @_;  | 
| 
110
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $self = {};  | 
| 
111
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	return bless($self, $class);  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $reliability = Biblio::Citation::Parser::Standard::get_reliability($template)  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The get_reliability method returns a value that acts as an indicator  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the likelihood of a template matching correctly. Fields such as  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 page ranges, URLs, etc, have high likelihoods (as they follow rigorous  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 patterns), whereas titles, publications, etc have lower likelihoods.  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method takes a template as a parameter, but you shouldn't really  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 need to use this method much.  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_reliability  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
130
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
  
1
  
 | 
19
 | 
 	my( $template ) = @_;  | 
| 
131
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	my $reliability = 0;  | 
| 
132
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
77
 | 
 	foreach(keys %factors)  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
134
 | 
200
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1786
 | 
 		if ($template =~ /$_/)  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
136
 | 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
274
 | 
 			while($template =~ /$_/)  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
138
 | 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
72
 | 
 				$reliability += $factors{$_};  | 
| 
139
 | 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
363
 | 
 				$template =~ s/$_//;	  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
143
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	return $reliability;  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $concreteness = Biblio::Citation::Parser::Standard::get_concreteness($template)  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 As with the get_reliability() method, get_concreteness() takes  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a template as a parameter, and returns a numeric indicator. In  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 this case, it is the number of non-field characters in the template.  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The more 'concrete' a template, the higher the probability that  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it will match well. For example, '_PUBLICATION_ Vol. _VOLUME_' is  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a better match than '_PUBLICATION_ _VOLUME_', as _PUBLICATION_ is  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 likely to subsume 'Vol.' in the second case.  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_concreteness  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
162
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
  
1
  
 | 
20
 | 
 	my( $template ) = @_;  | 
| 
163
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	my $concreteness = 0;  | 
| 
164
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
 	foreach(keys %factors)  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
166
 | 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1541
 | 
 		$template =~ s/$_//g;  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}	  | 
| 
168
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	return length($template);  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $string = Biblio::Citation::Parser::Standard::strip_spaces(@strings)  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is a helper function to remove spaces from all elements  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of an array.  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub strip_spaces  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {	  | 
| 
182
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
3
 | 
 	my(@bits) = @_;  | 
| 
183
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	foreach(@bits) { s/^[[:space:]]*(.+)[[:space:]]*$/$1/;}  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
184
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	return @bits;  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $templates = Biblio::Citation::Parser::Standard::get_templates()  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the current template list from the Biblio::Citation::Parser::Templates  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 module. Useful for giving status lists.  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_templates  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
198
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	return $Biblio::Citation::Parser::Templates::templates;  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item @authors = Biblio::Citation::Parser::Standard::handle_authors($string)  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This (rather large) function handles the author fields of a reference.  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It is not all-inclusive yet, but it is usably accurate. It can handle  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 author lists that are separated by semicolons, commas, and a few other  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 delimiters, as well as &, and, and 'et al'.  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method takes an author string as a parameter, and returns an array  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of extracted information in the format '{family => $family, given =>  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $given}'.  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut   | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub handle_authors  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
218
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
4
 | 
 	my($authstr) = @_;  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
220
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my @authsout = ();  | 
| 
221
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
59
 | 
 	$authstr =~ s/\bet al\b//;  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Handle semicolon lists  | 
| 
223
 | 
1
 | 
  
 50
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
20
 | 
 	if ($authstr =~ /;/)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
225
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my @auths = split /[[:space:]]*;[[:space:]]*/, $authstr;  | 
| 
226
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		foreach(@auths)  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
228
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my @bits = split /[,[:space:]]+/;  | 
| 
229
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			@bits = strip_spaces(@bits);  | 
| 
230
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			push @authsout, {family => $bits[0], given => $bits[1]};  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($authstr =~ /^[[:upper:]\.]+[[:space:]]+[[:alnum:]]/)  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
235
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my @bits = split /[[:space:]]+/, $authstr;  | 
| 
236
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		@bits = strip_spaces(@bits);  | 
| 
237
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $fam = 0;  | 
| 
238
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my($family, $given);  | 
| 
239
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		foreach(@bits)  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
241
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			next if ($_ eq "and" || $_ eq "&" || /^[[:space:]]*$/);  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
242
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			s/,//g;  | 
| 
243
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			if ($fam)  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
245
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$family = $_;  | 
| 
246
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				push @authsout, {family => $family, given => $given};  | 
| 
247
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$fam = 0;  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			else  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
251
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$given = $_;  | 
| 
252
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$fam = 1;  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($authstr =~ /^.+[[:space:]]+[[:upper:]\.]+/)  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Foo AJ, Bar PJ  | 
| 
259
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		my $fam = 1;  | 
| 
260
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		my $family = "";  | 
| 
261
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		my $given = "";  | 
| 
262
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 		my @bits = split /[[:space:]]+/, $authstr;  | 
| 
263
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 		@bits = strip_spaces(@bits);  | 
| 
264
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		foreach(@bits)  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
266
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 			s/[,;\.]//g;  | 
| 
267
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 			s/\bet al\b//g;  | 
| 
268
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 			s/\band\b//;  | 
| 
269
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 			s/\b&\b//;  | 
| 
270
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 			next if /^[[:space:]]*$/;  | 
| 
271
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 			if ($fam == 1)  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
273
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 				$family = $_;  | 
| 
274
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 				$fam = 0;  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			else  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
278
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 				$given = $_;  | 
| 
279
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 				$fam = 1;  | 
| 
280
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 				push @authsout, {family => $family, given => $given};  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}   | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($authstr =~ /^.+,[[:space:]]*.+/ || $authstr =~ /.+\band\b.+/)  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
287
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $fam = 1;  | 
| 
288
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $family = "";  | 
| 
289
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $given = "";  | 
| 
290
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my @bits = split /[[:space:]]*,|\band\b|&[[:space:]]*/, $authstr;  | 
| 
291
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		@bits = strip_spaces(@bits);  | 
| 
292
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		foreach(@bits)  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
294
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			next if /^[[:space:]]*$/;  | 
| 
295
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			if ($fam)  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
297
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$family = $_;  | 
| 
298
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$fam = 0;	  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			else  | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
302
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$given = $_;  | 
| 
303
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				push @authsout, {family => $family, given => $given};  | 
| 
304
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$fam = 1;  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($authstr =~ /^[[:alpha:][:space:]]+$/)  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
310
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$authstr =~ /^([[:alpha:]]+)[[:space:]]*([[:alpha:]]*)$/;  | 
| 
311
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $given = "";  | 
| 
312
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $family = "";  | 
| 
313
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 		if (defined $1 && defined $2)  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
315
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$given = $1;  | 
| 
316
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$family = $2;  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
318
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 		if (!defined $2 || $2 eq "")  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
320
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$family = $1;  | 
| 
321
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$given = "";  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
323
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @authsout, {family => $family, given => $given};  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif( $authstr =~ /[[:word:]]+[[:space:]]+[[:word:]]?[[:space:]]*[[:word:]]+/)  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
327
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my @bits = split /[[:space:]]+/, $authstr;  | 
| 
328
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $rest = $authstr;  | 
| 
329
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$rest =~ s/$bits[-1]//;  | 
| 
330
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @authsout, {family => $bits[-1], given => $rest};  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
336
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	return @authsout;  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item %metadata = $parser-Extract_metadata($reference)  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is the key method in the Standard module, although it is not actually  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 called directly by users (the 'parse' method provides a wrapper). It takes  | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a reference, and returns a hashtable representing extracted metadata.  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A regular expression map is present in this method to transform '_AUFIRST_',  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '_ISSN_', etc, into expressions that should match them. The method then  | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 finds the template which best matches the reference, picking the result that  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has the highest concreteness and reliability (see above), and returns the  | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 fields in the hashtable. It also creates the marked-up version, that is  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 useful for further formatting.   | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut   | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub extract_metadata  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
358
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
0
  
 | 
21
 | 
 	my($self, $ref) = @_;  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Skip to the first Alpha char  | 
| 
360
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	if ($ref !~ /^[[:digit:]]-X\.]+$/) { $ref =~ s/^[^[:alpha:]]+//; }  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
361
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	$ref =~ s/[[:space:]\*]+$//;  | 
| 
362
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	$ref =~ s/[[:space:]]{2}[[:space:]]+/ /g;  | 
| 
363
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	$ref =~ s/^[[:space:]\*]*(.+)[[:space:]\*]*$/$1/;  | 
| 
364
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my %metaout = ();  | 
| 
365
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$metaout{ref} = $ref;  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
367
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	$metaout{id} = [];  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Pull out doi addresses  | 
| 
369
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	if ($ref =~ s/doi:(.+)\b//)  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
371
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @{$metaout{id}}, "doi:$1";  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}	  | 
| 
373
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	if ($ref =~ s/((astro-ph|cond-mat|gr-qc|hep-ex|hep-lat|hep-ph|hep-th|math-th|nucl-ex|nucl-th|physics|quant-ph|math|nlin|cs)\/\d+\b)//)  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
375
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @{$metaout{id}}, "arxiv:$1";  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
377
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my @specific_pubs =  | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	(  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Put any specific publications in here  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	);   | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
382
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
 	my $spec_pubs = "";  | 
| 
383
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	if (scalar @specific_pubs > 0)  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
385
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
  		$spec_pubs = join("|", @specific_pubs);  | 
| 
386
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$spec_pubs = "|".$spec_pubs;  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
389
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my $initial_match = "(?:\\b[[:alpha:]]\\.|\\b[[:alpha:]]\\b)";	  | 
| 
390
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my $name_match = "(?:(?:[[:alpha:],;&-]+)\\b)";  | 
| 
391
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my $conjs = "(?:\\s+und\\s+|\\s+band\\s+|\\s|,|&|;)";  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
393
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
 	my %matches =  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	(  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_AUFIRST_"	=> "([[:alpha:]\.]+)",  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_AULAST_"	=> "([[:alpha:]-]+)",  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_ISSN_"	=> "([[:digit:]-]+)",  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_AUTHORS_"	=> "((?:$initial_match|$name_match|$conjs)+?)",  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_DATE_"	=> "([[:digit:]]{2}/[[:digit:]]{2}/[[:digit]]{2})",  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_YEAR_" 	=> "([[:digit:]]{4})",  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_TITLE_" 	=> "(.+?[a-zA-Z]+.+?)",  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_SUBTITLE_" 	=> "(.+)",  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_CHAPTER_"	=> "([[:digit:]]+)",  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_UCTITLE_" 	=> "([^[:lower:]]+)",  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_CAPTITLE_"	=> "([[:upper:]][^[:upper:]]+)",  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_PUBLICATION_" => "([^0-9\(\);\"']{4,}$spec_pubs)",  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_PUBLISHER_" => "(.+)",  | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_PUBLOC_" => "(.+)",  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_EDITOR_" => "([[:alpha:]\\.,;\\s&-]+)",  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_UCPUBLICATION_" => "([^[:lower:]]+)",  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_CAPPUBLICATION_"	=> "([[:upper:]][^[:upper:]]+)",  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_VOLUME_" 	=> "([[:digit:]]+)",  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_ISSUE_" 	=> "([[:digit:]]+)",  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_PAGES_" 	=> "([[:digit:]]+-{1,2}[[:digit:]]+?)",  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_ANY_" 	=> "(.+?)",  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_ISBN_"	=> "([[:digit:]X-]+)",		  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_ISSN_"	=> "([[:digit:]X-]+)",		  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_SPAGE_"	=> "([[:digit:]]+)",  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_EPAGE_"	=> "([[:digit:]]+)",  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"_URL_"		=> "(((http(s?):\\/\\/(www\\.)?)|(\\bwww\\.)|(ftp:\\/\\/(ftp\\.)?))([-\\w\\.:\\/\\s]+)(\\/|\\.\\S+|#\\w+))",  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	);  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
424
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my(@newtemplates) = ();  | 
| 
425
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	foreach my $template (@$Biblio::Citation::Parser::Templates::templates)  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
427
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
490
 | 
 		$_ = $template;  | 
| 
428
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
462
 | 
 		s/\\/\\\\/g;  | 
| 
429
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1511
 | 
 		s/\(/\\\(/g;  | 
| 
430
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
906
 | 
 		s/\)/\\\)/g;  | 
| 
431
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
437
 | 
 		s/\[/\\\[/g;  | 
| 
432
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
381
 | 
 		s/\]/\\\]/g;  | 
| 
433
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
962
 | 
 		s/\./\\\./g;  | 
| 
434
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1466
 | 
 		s/ /\[\[:space:\]\]+/g;  | 
| 
435
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
490
 | 
 		s/\?/\\\?/g;  | 
| 
436
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1306
 | 
 		foreach my $key (keys %matches)  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
438
 | 
9625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67032
 | 
 			s/$key/$matches{$key}/g;  | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
440
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1029
 | 
 		$_ .= "[.]?";  | 
| 
441
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
686
 | 
 		push @newtemplates,$_;  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
443
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $index = 0;	  | 
| 
444
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my @vars = ();  | 
| 
445
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my @matchedvars = ();  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
447
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my $curr_conc = 0;  | 
| 
448
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my $curr_rel = 0;  | 
| 
449
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $max_conc = 0;  | 
| 
450
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my $max_rel = 0;  | 
| 
451
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $best_match = "";  | 
| 
452
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my $best_orig = "";  | 
| 
453
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	foreach my $currtemplate (@newtemplates)  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
455
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
865
 | 
 		my $original = $Biblio::Citation::Parser::Templates::templates->[$index];  | 
| 
456
 | 
385
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
184868
 | 
 		if ($ref =~ /^$currtemplate$/)  | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
458
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
 			$curr_rel = get_reliability($original);  | 
| 
459
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
 			$curr_conc = get_concreteness($original);  | 
| 
460
 | 
8
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
54
 | 
 			if ($curr_rel > $max_rel)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
462
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 				$best_match = $currtemplate;  | 
| 
463
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 				$best_orig = $original;  | 
| 
464
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 				$max_conc = $curr_conc;  | 
| 
465
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 				$max_rel = $curr_rel;  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			elsif ($curr_rel == $max_rel && $curr_conc > $max_conc)  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
469
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$best_match = $currtemplate;  | 
| 
470
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$best_orig = $original;  | 
| 
471
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$max_conc = $curr_conc;  | 
| 
472
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$max_rel = $curr_rel;  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
475
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37215
 | 
 		$index++;  | 
| 
476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
478
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	$metaout{match} = $best_orig;  | 
| 
479
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	@vars = ($best_orig =~ /_([A-Z]+)_/g);  | 
| 
480
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
298
 | 
 	@matchedvars = ($ref =~ /^$best_match$/);  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
482
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
 	$index = 0;  | 
| 
483
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	if (scalar @matchedvars > 0)  | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
485
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		foreach(@vars)  | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
487
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 			$matchedvars[$index] =~ s/^\s*(.+)\s*$/$1/;  | 
| 
488
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
 			$metaout{lc $_} = $matchedvars[$index];  | 
| 
489
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 			$index++;  | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
492
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	foreach(keys %metaout)  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
494
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
20
 | 
 		if (/^uc/)  | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
496
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $alt = $_;  | 
| 
497
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$alt =~ s/^uc//;  | 
| 
498
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			if (!defined $metaout{$alt} || $metaout{$alt} eq "")  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
500
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$metaout{$alt} = $metaout{$_};  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Create a marked-up version   | 
| 
506
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $in_ref = $ref;  | 
| 
507
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $in_tmp = $best_orig;  | 
| 
508
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my $in_tmp2 = $best_orig;  | 
| 
509
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	foreach(keys %metaout)  | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
511
 | 
9
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
69
 | 
 		next if (!defined $metaout{$_} || $metaout{$_} eq "" || $_ eq "any");  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
512
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 		my $toreplace = "_".(uc $_)."_";  | 
| 
513
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
97
 | 
 		$in_tmp =~ s/$toreplace/<$_>$metaout{$_}<\/$_>/g;  | 
| 
514
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
92
 | 
 		$in_tmp2 =~ s/$toreplace/$metaout{$_}/g;  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Fix any _ANY_s  | 
| 
518
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$in_tmp2 =~ s/\\/\\\\/g;  | 
| 
519
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	$in_tmp2 =~ s/\(/\\\(/g;  | 
| 
520
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	$in_tmp2 =~ s/\)/\\\)/g;  | 
| 
521
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$in_tmp2 =~ s/\[/\\\[/g;  | 
| 
522
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	$in_tmp2 =~ s/\]/\\\]/g;  | 
| 
523
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	$in_tmp2 =~ s/\./\\\./g;  | 
| 
524
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	$in_tmp2 =~ s/ /\[\[:space:\]\]+/g;  | 
| 
525
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	$in_tmp2 =~ s/\?/\\\?/g;  | 
| 
526
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	$in_tmp2 =~ s/_ANY_/(.+)/g;  | 
| 
527
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
 	my(@anys) = ($in_ref =~ /$in_tmp2/g);  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
529
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	foreach(@anys)  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
531
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 		$in_tmp =~ s/_ANY_/$_<\/any>/;  | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
533
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$metaout{marked} = $in_tmp;  | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Map to OpenURL  | 
| 
535
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	if (defined $metaout{authors})  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
537
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 		$metaout{authors} = [handle_authors($metaout{authors})];  | 
| 
538
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		$metaout{aulast} = $metaout{authors}[0]->{family};  | 
| 
539
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 		$metaout{aufirst} = $metaout{authors}[0]->{given};  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
541
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
42
 | 
 	if (defined $metaout{publisher} && !defined $metaout{publication})  | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
543
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$metaout{genre} = "book";  | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
545
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	$metaout{atitle} = $metaout{title};	  | 
| 
546
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	$metaout{title} = $metaout{publication};  | 
| 
547
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	if (defined $metaout{cappublication}) { $metaout{title} = $metaout{cappublication} };  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
548
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	$metaout{date} = $metaout{year};  | 
| 
549
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
107
 | 
 	return %metaout;  | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $metadata = $parser-Eparse($reference);  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method provides a wrapper to the extract_metadata  | 
| 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 function. Simply pass a reference string, and a metadata  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 hash is returned.  | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parse  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
565
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
7
 | 
 	my($self, $ref) = @_;  | 
| 
566
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	my $hashout = {$self->extract_metadata($ref)};  | 
| 
567
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	return $hashout;  | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |