File Coverage

blib/lib/Bib/Tools.pm
Criterion Covered Total %
statement 191 543 35.1
branch 33 194 17.0
condition 11 60 18.3
subroutine 30 43 69.7
pod 19 19 100.0
total 284 859 33.0


line stmt bran cond sub pod time code
1             ############################################################
2             #
3             # Bib::Tools - For managing collections of Bib::CrossRef references.
4             #
5             ############################################################
6              
7             package Bib::Tools;
8              
9 1     1   70451 use 5.8.8;
  1         11  
10 1     1   5 use strict;
  1         2  
  1         31  
11 1     1   6 use warnings;
  1         2  
  1         42  
12 1     1   6 no warnings 'uninitialized';
  1         1  
  1         51  
13              
14             require Exporter;
15 1     1   559 use Bib::CrossRef;
  1         84803  
  1         57  
16 1     1   11 use LWP::UserAgent;
  1         1  
  1         23  
17 1     1   4 use JSON qw/decode_json/;
  1         3  
  1         7  
18 1     1   105 use URI::Escape qw(uri_escape_utf8 uri_unescape);
  1         2  
  1         56  
19 1     1   6 use HTML::Entities qw(decode_entities encode_entities);
  1         2  
  1         41  
20 1     1   651 use HTML::TreeBuilder::XPath;
  1         61411  
  1         9  
21 1     1   44 use XML::Simple qw(XMLin);
  1         13  
  1         9  
22 1     1   634 use BibTeX::Parser qw(new next);
  1         12050  
  1         35  
23 1     1   595 use IO::File;
  1         9379  
  1         113  
24 1     1   8 use vars qw($VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS @ISA);
  1         2  
  1         6194  
25              
26             #use LWP::Protocol::https;
27             #use Data::Dumper;
28              
29             $VERSION = '0.17';
30             @ISA = qw(Exporter);
31             @EXPORT = qw();
32             @EXPORT_OK = qw(
33             sethtml clearhtml add_details add_google add_google_search add_orcid add_fromfile add_dblp add_pubmed
34             send_resp print print_nodoi num num_nodoi getref getref_nodoi append add_bibtex
35             );
36             %EXPORT_TAGS = (all => \@EXPORT_OK);
37              
38             ####################################################################################
39             sub new {
40 3     3 1 1756 my $self;
41             # defaults
42 3         9 $self->{refs} = []; # the references
43 3         7 $self->{nodoi_refs} = [];
44 3         6 $self->{duprefs} = [];
45 3         7 $self->{html}=0;
46 3         6 $self->{ratelimit}=5; # limit of 5 crossref queries per sec
47 3         7 $self->{last} = {};
48 3         6 bless $self;
49            
50 3         4 my $ratelimit = $_[1];
51 3 50 33     11 if (defined($ratelimit) && ($ratelimit>=0)) {$self->{ratelimit}=$ratelimit};
  0         0  
52 3         7 return $self;
53             }
54              
55             ####################################################################################
56             sub sethtml {
57 1     1 1 2 my $self = shift @_;
58 1         2 $self->{html}=1;
59             }
60              
61             ####################################################################################
62             sub clearhtml {
63 0     0 1 0 my $self = shift @_;
64 0         0 $self->{html}=0;
65             }
66              
67             ####################################################################################
68             sub _err {
69 0     0   0 my ($self, $str) = @_;
70 0 0       0 if ($self->{html}) {
71 0         0 print "

",$str,"

";
72             } else {
73 0         0 print $str,"\n";
74             }
75             }
76              
77             ####################################################################################
78             sub _split_duplicates {
79             # split list of references into three lists: one with refs that have unique doi's, one with no doi's
80             #and one with all the rest (with duplicate doi's)
81 3     3   10 my $self = shift @_;
82 3         7 my @refs=@{$self->{refs}};
  3         8  
83            
84 3         6 my @newrefs;
85 3         8 foreach my $ref (@refs) {
86 5         15 my $doi = $ref->doi();
87 5 100 66     40 if (!defined($doi) || length($doi)==0) {push @{$self->{nodoi_refs}}, $ref; next; }# skip entries with no DOI
  3         5  
  3         6  
  3         6  
88 2         4 my $found = 0;
89 2         4 foreach my $ref2 (@newrefs) {
90 1 50       4 if ($ref2->doi() eq $doi) {
91 1         8 $found = 1;
92             }
93             }
94 2 100       6 if (!$found) {
95 1         2 push @newrefs, $ref;
96             } else {
97 1         2 push @{$self->{duprefs}}, $ref;
  1         4  
98             }
99             }
100 3         21 $self->{refs} = \@newrefs;
101             }
102              
103             ####################################################################################
104             sub append {
105             # add new reference to end of existing list
106 5     5 1 1557 my $self = shift @_;
107 5         11 my $ref = shift @_;
108 5         7 push @{$self->{refs}}, $ref;
  5         29  
109             }
110              
111             ####################################################################################
112             sub add_details {
113             # given an array of raw strings, try to convert into paper references
114            
115 1     1 1 380 my $self = shift @_;
116 1         5 foreach my $cites (@_) {
117 2         16 $self->{last} = Bib::CrossRef->new();
118 2         27 $self->{last}->parse_text($cites);
119 2         15674 $self->append($self->{last});
120 2         199 sleep 1/(0.001+$self->{ratelimit}); # rate limit queries to crossref
121             }
122 1         9 $self->_split_duplicates();
123             }
124              
125             ####################################################################################
126             sub add_google {
127             # scrape paper details from google scholar personal page -- nb: no doi info on google, so use crossref.org to obtain this
128             # nb: doesn't work with google scholar search results
129            
130 0     0 1 0 my $self = shift @_;
131 0         0 my $url = shift @_;
132 0         0 my $ua = LWP::UserAgent->new;
133 0         0 $ua->agent('Mozilla/5.0');
134 0         0 my $req = HTTP::Request->new(GET => $url);
135 0         0 my $res = $ua->request($req);
136 0 0       0 if ($res->is_success) {
137 0         0 my $tree= HTML::TreeBuilder::XPath->new;
138 0         0 $tree->parse($res->decoded_content);
139 0         0 my @atitles=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/a[@class="gsc_a_at"]');
140 0         0 my @authors=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/div[@class="gs_gray"][1]');
141 0         0 my @jtitles=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/div[@class="gs_gray"][2]');
142 0         0 my $len1 = @atitles; my $len2 = @authors; my $len3 = @jtitles;
  0         0  
  0         0  
143 0 0 0     0 if (($len1 != $len2) || ($len1 != $len3) || ($len2 != $len3)) {$self->_err("Problem parsing google page: mismatched $len1 titles/$len2 authors/$len3 journals.");return []}
  0   0     0  
  0         0  
144 0         0 for (my $i = 0; $i<$len1; $i++) {
145             # these are already utf8
146 0         0 $authors[$i] = decode_entities($authors[$i]);
147 0         0 $atitles[$i] = decode_entities($atitles[$i]);
148 0         0 $jtitles[$i] = decode_entities($jtitles[$i]);
149 0         0 my $temp = $authors[$i].", ".$atitles[$i].", ".$jtitles[$i];
150 0         0 my $r = Bib::CrossRef->new;
151 0         0 $r->parse_text($temp);
152 0         0 $jtitles[$i] =~ m/\s([0-9][0-9][0-9][0-9])$/;
153 0         0 my $year=$1;
154 0 0 0     0 if ((length($year)==4) && ($r->date ne $1)) {
155 0         0 $r->_setscore(0.5); # mismatch in year, probably bad
156             }
157 0         0 $self->append($r);
158             }
159             } else {
160 0         0 $self->_err("Problem with $url: ".$res->status_line);
161             }
162             }
163              
164             ####################################################################################
165             sub add_google_search {
166             # scrape paper details from google scholar search results -- *not* from persons scholar home page
167              
168 0     0 1 0 my $self = shift @_;
169 0         0 my $url = shift @_;
170 0         0 my $ua = LWP::UserAgent->new;
171 0         0 $ua->agent('Mozilla/5.0');
172 0         0 my $req = HTTP::Request->new(GET => $url);
173 0         0 my $res = $ua->request($req);
174 0 0       0 if ($res->is_success) {
175 0         0 my $tree= HTML::TreeBuilder::XPath->new;
176 0         0 $tree->parse($res->decoded_content);
177 0         0 my @atitles=$tree->findvalues('//div[@class="gs_ri"]/h3/a');
178 0         0 my @authors=$tree->findvalues('//div[@class="gs_a"]');
179 0         0 my $len1 = @atitles; my $len2 = @authors;
  0         0  
180 0 0       0 if ($len1 != $len2) {$self->_err("Problem parsing google page: mismatched $len1 titles/$len2 authors.");return [];}
  0         0  
  0         0  
181 0         0 my @cites=();
182 0         0 for (my $i = 0; $i<$len1; $i++) {
183 0         0 $authors[$i] = decode_entities($authors[$i]);
184 0         0 $atitles[$i] = decode_entities($atitles[$i]);
185 0         0 my $str = $authors[$i].", ".$atitles[$i];
186 0 0       0 if (length($str)>5) { # a potentially useful entry ?
187 0         0 push @cites, $authors[$i].", ".$atitles[$i];
188             }
189             }
190 0         0 $self->add_details(@cites);
191             } else {
192 0         0 $self->_err("Problem with $url: ".$res->status_line);
193             }
194             }
195              
196             ####################################################################################
197             sub _dblp_setauth {
198 0     0   0 my $self = shift @_; my $r = shift @_; my $cite = shift @_;
  0         0  
  0         0  
199            
200 0 0       0 if (ref($cite->{'author'}) eq "HASH") {
201 0         0 $r->_setauthcount(1);
202 0         0 $r->_setauth(1,$cite->{'author'});
203             } else {
204 0         0 my $count = 0;
205 0         0 foreach my $au (@{$cite->{'author'}}) {
  0         0  
206 0         0 $count++;
207 0         0 $r->_setauth($count, $au);
208             }
209 0         0 $r->_setauthcount($count);
210             }
211             }
212              
213             ####################################################################################
214             sub add_dblp {
215             # get details using DBLP XML API
216            
217 0     0 1 0 my $self = shift @_;
218 0         0 my $url = shift @_;
219 0 0       0 my $maxnum = shift @_; if (!defined($maxnum)) {$maxnum=-1;}
  0         0  
  0         0  
220            
221 0         0 my $ua = LWP::UserAgent->new;
222 0         0 $ua->agent('Mozilla/5.0');
223 0         0 my $req = HTTP::Request->new(GET => $url);
224 0         0 my $res = $ua->request($req);
225 0 0       0 if ($res->is_success) {
226 0         0 my $xs = XML::Simple->new();
227 0         0 my $data = $xs->XMLin($res->decoded_content);
228 0         0 my @cites; my @ctemp;
229 0 0       0 if (defined $data->{'r'}) {
    0          
230             # a person page
231 0         0 @cites = $data->{'r'};
232             } elsif (defined $data->{'article'}) {
233             # its xml for a single article
234 0         0 $ctemp[0] = $data;
235 0         0 push @cites, \@ctemp;
236             }
237 0         0 my $num=0;
238 0         0 foreach my $c (@{$cites[0]}) {
  0         0  
239 0 0 0     0 $num++; if ($maxnum>0 && $num>$maxnum) {last;} # mainly for testing
  0         0  
  0         0  
240 0         0 my @k = keys %{$c};
  0         0  
241 0         0 my $cite = $c->{$k[0]};
242 0         0 my $ee = $cite->{'ee'};
243 0 0       0 if ($ee =~ m/dx.doi.org/) {
244             # we have a DOI, lets call crossref
245 0         0 $ee =~ s/http:\/\/dx.doi.org\///;
246 0         0 my $r = Bib::CrossRef->new;
247 0         0 $r->parse_text($ee);
248 0 0       0 if ($r->score >=1) {
249 0 0 0     0 if (!defined $r->authcount || $r->authcount==0) {
250             # shouldn't happen, but sometimes doi data lacks authors so use dblp data
251 0         0 $self->_dblp_setauth($r,$cite);
252             }
253 0         0 $self->append($r);
254 0         0 next; # move on to next record
255             }
256             }
257 0         0 my $jtitle='';
258 0 0       0 if (defined $cite->{'journal'}) {
    0          
259 0         0 $jtitle = $cite->{'journal'};
260             } elsif (defined $cite->{'booktitle'}) {
261 0         0 $jtitle = $cite->{'booktitle'};
262             }
263 0         0 my $temp = $cite->{'year'}.' '.$cite->{'title'}.' '.$jtitle. ' ';
264 0 0       0 if (ref($cite->{'author'}) eq "HASH") {
265 0         0 $temp .= $cite->{'author'};
266             } else {
267 0         0 foreach my $au (@{$cite->{'author'}}) { $temp .= $au.", ";}
  0         0  
  0         0  
268             }
269 0         0 my $r = Bib::CrossRef->new;
270 0         0 $r->parse_text($temp);
271 0 0       0 if ($r->score >= 1) {
272             # found an ok match, lets use it
273 0         0 $self->append($r);
274 0         0 next; # move on
275             }
276            
277             # we got a poor match, lets use the rest of the dblp data
278 0         0 $r = Bib::CrossRef->new;
279 0 0       0 if (exists $cite->{'publtype'}) {
    0          
    0          
    0          
280 0         0 $r->_setgenre($cite->{'publtype'});
281             } elsif ($k[0] =~ m/article/) {
282 0         0 $r->_setgenre('article');
283             } elsif ($k[0] =~ m/inproceedings/) {
284 0         0 $r->_setgenre('proceeding');
285             } elsif ($k[0] =~ m/informal/) {
286 0         0 $r->_setgenre('preprint');
287             } else {
288 0         0 $r->_setgenre($k[0]);
289             }
290 0         0 $r->_setdate($cite->{'year'}); $r->_setatitle($cite->{'title'}); $r->_setjtitle($jtitle);
  0         0  
  0         0  
291 0         0 $self->_dblp_setauth($r,$cite);
292 0 0       0 if (defined $cite->{'volume'}) {$r->_setvolume($cite->{'volume'});}
  0         0  
293 0 0       0 if (defined $cite->{'number'}) {$r->_setissue($cite->{'number'});}
  0         0  
294 0 0       0 if (defined $cite->{'pages'}) {
295 0         0 my @bits = split('-',$cite->{'pages'});
296 0 0       0 if (defined $bits[0]) {$r->_setspage($bits[0]);}
  0         0  
297 0 0       0 if (defined $bits[1]) {$r->_setepage($bits[1]);}
  0         0  
298             }
299 0 0       0 if (($cite->{'ee'} =~ m/^http:\/\//)) {$r->_seturl($cite->{'ee'});}
  0         0  
300 0         0 $r->_setscore(1);
301 0         0 $r->_setquery($temp);
302             # add manually constructed record
303 0         0 $self->append($r);
304             }
305 0         0 $self->_split_duplicates();
306             } else {
307 0         0 $self->_err("Problem with $url: ".$res->status_line);
308             }
309             }
310              
311             ####################################################################################
312             sub _orcid_getdoi {
313             # extract DOI from an orcid entry
314 0     0   0 my $cite = shift @_;
315            
316 0         0 my $doi='';
317 0         0 my $c = $cite->{'work-external-identifiers'}->{'work-external-identifier'};
318 0 0       0 if (ref($c) eq "HASH") {
319             # a single value
320 0 0       0 if ($c->{'work-external-identifier-type'} =~ m/doi/) {
321             # and its a DOI
322 0         0 $doi = $c->{'work-external-identifier-id'};
323             }
324             } else {
325             # multiple values
326 0         0 foreach my $id (@{$c}) {
  0         0  
327 0 0       0 if ($id->{'work-external-identifier-type'} =~ m/doi/) {
328             # its a DOI
329 0         0 $doi = $id->{'work-external-identifier-id'};
330 0         0 last; # exit loop
331             }
332             }
333             }
334 0         0 return $doi;
335             }
336              
337             ####################################################################################
338             sub _orcid_getauth {
339             # generate an author string from an orcid entry
340 0     0   0 my $cite = shift @_;
341              
342 0         0 my $auth='';
343 0         0 my $c = $cite->{'work-contributors'}->{'contributor'};
344 0 0       0 if (ref($c) eq "HASH") {
345             # single author
346 0 0       0 if ($c->{'contributor-attributes'}->{'contributor-role'} =~ m/author/) {$auth = $c->{'credit-name'}->{'content'};}
  0         0  
347             } else {
348             # multiple authors
349 0         0 foreach my $au (@{$c}) {
  0         0  
350 0 0       0 if ($au->{'contributor-attributes'}->{'contributor-role'} =~ m/author/) {$auth .= $au->{'credit-name'}->{'content'}.", ";}
  0         0  
351             }
352             }
353 0         0 return $auth;
354             }
355              
356             ####################################################################################
357             sub _orcid_setauth {
358             # use an orcid entry to set citation author list (using orcid bibtex data if appropriate)
359 0     0   0 my $self = shift @_; my $r = shift @_;
  0         0  
360 0         0 my $cite = shift @_; my $entry = shift @_;
  0         0  
361            
362 0         0 my $c = $cite->{'work-contributors'}->{'contributor'};
363 0 0       0 if (defined $c) {
364             # we have an orcid author entry
365 0         0 my $authcount=0;
366 0 0       0 if (ref($c) eq "HASH") {
367             # single author
368 0 0       0 if ($c->{'contributor-attributes'}->{'contributor-role'} =~ m/author/) {
369 0         0 $authcount++;
370 0         0 $r->_setauth($authcount,$c->{'credit-name'}->{'content'});
371             }
372             } else {
373             # multiple authors
374 0         0 foreach my $au (@{$c}) {
  0         0  
375 0 0       0 if ($au->{'contributor-attributes'}->{'contributor-role'} =~ m/author/) {
376 0         0 $authcount++;
377 0         0 $r->_setauth($authcount,$au->{'credit-name'}->{'content'});
378             }
379             }
380             }
381 0         0 $r->_setauthcount($authcount);
382 0 0       0 if ($authcount>0) {return;} # found some authors, finish up
  0         0  
383             }
384             # no author info, lets see if bibtex has any author info
385 0 0       0 if (defined $entry) {
386 0         0 $self->_bibtex_setauth($r,$entry);
387             }
388             }
389              
390             ####################################################################################
391             sub add_orcid {
392             # get paper details from orcid using API
393            
394 0     0 1 0 my $self = shift @_; my $orcid_id = shift @_;
  0         0  
395            
396 0         0 my $ua = LWP::UserAgent->new;
397 0         0 my $req = HTTP::Request->new(GET => "http://pub.orcid.org/$orcid_id/orcid-works/");
398 0         0 my $res = $ua->request($req);
399 0 0       0 if ($res->is_success) {
400 0         0 my $xs = XML::Simple->new();
401             # the orcid response is utf8 xml
402 0         0 my $data = $xs->XMLin($res->decoded_content);
403 0         0 my @cites = $data->{'orcid-profile'}->{'orcid-activities'}->{'orcid-works'}->{'orcid-work'};
404 0         0 foreach my $cite (@{$cites[0]}) {
  0         0  
405 0         0 my $entry = undef;
406 0 0       0 if ($cite->{'work-citation'}->{'work-citation-type'} =~ m/bibtex/) {
407             # we have a bibtex reference, extract some extra info
408 0         0 my $bibtex = $cite->{'work-citation'}->{'citation'};
409 0         0 open my $fh, '<', \$bibtex;
410 0         0 my $parser = BibTeX::Parser->new($fh);
411 0         0 $entry = $parser->next;
412 0 0       0 if (!$entry->parse_ok) {$entry = undef;}
  0         0  
413             }
414 0         0 my $doi = _orcid_getdoi($cite);
415 0 0 0     0 if ((defined $doi) && (length($doi)>5)) { # we seem to have a DOI
416             # use DOI to search.crossref.org
417 0         0 my $r = Bib::CrossRef->new;
418 0         0 $r->parse_text($doi);
419 0 0       0 if ($r->score>=1) {
420 0 0 0     0 if (!defined $r->authcount || $r->authcount==0) {
421             # shouldn't happen, but sometimes doi data lacks authors so use orcid data
422 0         0 $self->_orcid_setauth($r,$cite,$entry);
423             }
424 0         0 $self->append($r);
425 0         0 next; # move on
426             }
427             }
428             # use title etc to search.crossref.org
429 0         0 my $date; my $atitle; my $jtitle;
  0         0  
430 0 0       0 if (exists $cite->{'publication-date'}->{'year'}) {$date = $cite->{'publication-date'}->{'year'};}
  0         0  
431 0 0       0 if (exists $cite->{'work-title'}->{'title'}) {$atitle = $cite->{'work-title'}->{'title'};}
  0         0  
432 0 0       0 if (exists $cite->{'journal-title'}) {$jtitle = $cite->{'journal-title'};}
  0         0  
433 0         0 my $auth = _orcid_getauth($cite);
434 0         0 my $temp=$auth.' '.$date.' '.$atitle.' '.$jtitle;
435 0 0 0     0 if (length($temp)>10 && length($date)>0 && length($atitle)+length($auth)>0) { # we have a potentially useful search string
      0        
436 0         0 my $r = Bib::CrossRef->new;
437 0         0 $r->parse_text($temp);
438 0 0       0 if ($r->score >= 1) {
439             # found an ok match, lets use it
440 0         0 $self->append($r);
441 0         0 next; # move on
442             }
443             }
444             # for a poor match, try to extract rest of info from orcid
445 0         0 my $r = Bib::CrossRef->new;
446 0         0 $r->_setdate($date); $r->_setatitle($atitle); $r->_setjtitle($jtitle);
  0         0  
  0         0  
447 0 0       0 if (exists $cite->{'work-type'}) {$r->_setgenre($cite->{'work-type'});}
  0         0  
448 0         0 $self->_orcid_setauth($r,$cite);
449 0         0 $self->_bibtex_parse($r,$entry);
450 0         0 $r->_setscore(1);
451 0         0 $r->_setquery($temp);
452             # add manually constructed record
453 0         0 $self->append($r);
454             }
455 0         0 $self->_split_duplicates();
456             } else {
457 0         0 $self->_err("Problem with orcid.org: ".$res->status_line);
458             }
459             }
460              
461             ####################################################################################
462             sub _find_pubmed {
463 0     0   0 my $c = shift @_;
464 0         0 my $name = shift @_;
465 0         0 my $term = shift @_;
466 0         0 foreach my $item (@{$c}) {
  0         0  
467 0 0       0 if ($item->{'Name'} eq $name) {
468 0         0 return $item->{$term};
469             }
470             }
471 0         0 return undef;
472             }
473              
474             ####################################################################################
475             sub add_pubmed {
476             # add results from a pubmed query
477 0     0 1 0 my ($self,$q) = @_;
478              
479 0         0 my $ua = LWP::UserAgent->new;
480 0         0 $q =~ s/\s+/+/g;
481 0         0 my $req = HTTP::Request->new(GET => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?usehistory=y&db=pubmed&term=".$q);
482 0         0 my $res = $ua->request($req);
483 0 0       0 if ($res->is_success) {
484 0 0       0 my $web = $1 if ($res->decoded_content =~ /(\S+)<\/WebEnv>/);
485 0 0       0 my $key = $1 if ($res->decoded_content =~ /(\d+)<\/QueryKey>/);
486 0         0 $req = HTTP::Request->new(GET => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&query_key=$key&WebEnv=$web");
487 0         0 $res = $ua->request($req);
488 0 0       0 if ($res->is_success) {
489 0         0 my $xs = XML::Simple->new();
490 0         0 my $data = $xs->XMLin($res->decoded_content);
491 0         0 my @cites = $data->{'DocSum'};
492 0         0 foreach my $cite (@{$cites[0]}) {
  0         0  
493 0         0 my $c = $cite->{'Item'};
494 0 0       0 if (ref($c) ne "ARRAY") {next;}
  0         0  
495 0         0 my $r = Bib::CrossRef->new;
496 0         0 my $doi = _find_pubmed($c,'DOI','content');
497 0 0       0 if (defined $doi) {
498             # PubMed is reliable, no need to call crossref
499             # my $r = Bib::CrossRef->new;
500             # $r->parse_text($doi);
501             # $self->append($r);
502             # next; # move on
503 0         0 $r->_setdoi($doi);
504 0         0 $r->_seturl('http://dx.doi.org/'.$doi);
505             }
506 0         0 $r->_setjtitle(_find_pubmed($c,'FullJournalName','content'));
507 0         0 $r->_setatitle(_find_pubmed($c,'Title','content'));
508 0         0 my $date = _find_pubmed($c,'PubDate','content');
509 0         0 $date =~ m/^([0-9][0-9][0-9][0-9])/;
510 0         0 $r->_setdate($1); # extract the year
511 0         0 $r->_setvolume(_find_pubmed($c,'Volume','content'));
512 0         0 $r->_setissue(_find_pubmed($c,'Issue','content'));
513 0         0 my $p = _find_pubmed($c,'Pages','content');
514 0         0 my @bits = split('-',$p);
515 0         0 $r->_setspage($bits[0]); $r->_setepage($bits[1]);
  0         0  
516            
517 0         0 my $aulist = _find_pubmed($c,'AuthorList','Item');
518 0         0 my $authcount=0;
519 0 0       0 if (ref($aulist) ne "ARRAY") {
520 0         0 $authcount = 1;
521 0         0 $r->_setauth($authcount,$aulist->{'content'});
522             } else {
523 0         0 foreach my $au (@{$aulist}) {
  0         0  
524 0         0 $authcount++;
525 0         0 $r->_setauth($authcount,$au->{'content'});
526             }
527             }
528 0         0 $r->_setauthcount($authcount);
529 0         0 my $g = _find_pubmed($c,'FullJournalName','Item');
530 0         0 $r->_setgenre($g->{'content'});
531 0         0 $r->_setscore(1);
532             #$r->_setquery($auth." ".$temp);
533             # add manually constructed record
534 0         0 $self->append($r);
535             }
536             }
537 0         0 $self->_split_duplicates();
538 0         0 return;
539             }
540 0         0 $self->_err("Problem with http://eutils.ncbi.nlm.nih.gov: ".$res->status_line);
541             }
542              
543             ####################################################################################
544             sub _rem_brackets {
545             # remove {} brackets from bibtex entry
546 6     6   91 my $self = shift @_; my $str = shift @_;
  6         9  
547            
548 6         12 $str =~ s/[\{\}]//g;
549 6         10 $str =~ s/\\textquotesingle/\\'/g;
550 6         25 return $str
551             }
552              
553             ####################################################################################
554             sub _bibtex_setauth {
555             # use author list array to populate our author details
556 1     1   3 my $self = shift @_; my $r = shift @_; my $entry = shift @_;
  1         2  
  1         2  
557              
558 1 50 33     10 if ((!defined $r) || (!defined $entry) || (!$entry->parse_ok)) {return;}
  0   33     0  
559              
560 1         11 my @authors = $entry->author;
561 1         14 my $count = 0;
562 1         3 foreach my $author (@authors) {
563 2         10 $count++;
564 2         8 $r->_setauth($count,$self->_rem_brackets($author->first).' '.$self->_rem_brackets($author->last));
565             };
566 1         20 $r->_setauthcount($count);
567             }
568              
569             ####################################################################################
570             sub _bibtex_parse {
571             # use data in a bibtext entry to populate citation
572 1     1   3 my $self = shift @_; my $r = shift @_; my $entry = shift @_;
  1         2  
  1         14  
573            
574 1 50 33     12 if ((!defined $r) || (!defined $entry) || (!$entry->parse_ok)) {return;}
  0   33     0  
575            
576 1         12 my $genre = lc($entry->type);
577 1 50 33     35 if (($genre eq "inproceedings") || ($genre eq "proceedings")) {
578 0         0 $r->_setgenre("proceeding");
579             } else {
580 1         6 $r->_setgenre($genre);
581             }
582 1 50       24 if (defined $entry->field('title')) { $r->_setatitle($self->_rem_brackets($entry->field('title')));}
  1         15  
583 1 50       11 if (defined $entry->author) {$self->_bibtex_setauth($r,$entry);}
  1         499  
584 1 50       11 if (defined $entry->field('url')) {$r->_seturl($entry->field('url'))};
  1         12  
585 1 50       19 if (defined $entry->field('year')) {$r->_setdate($entry->field('year'))};
  1         11  
586 1 50       13 if (defined $entry->field('volume')) {$r->_setvolume($entry->field('volume'))};
  1         11  
587 1 50       13 if (defined $entry->field('issue')) {$r->_setissue($entry->field('issue'))};
  0         0  
588 1 50       10 if (defined $entry->field('pages')) {
589 0         0 my $pages = $entry->field('pages');
590 0         0 (my $s, my $e) = ($pages =~ /([0-9]+)-+([0-9]+)/ );
591 0 0       0 if (defined $s) { $r->_setspage($s); }
  0         0  
592 0 0       0 if (defined $e) { $r->_setepage($e); }
  0         0  
593             }
594 1 50       9 if (defined $entry->field('journal')) {
    0          
595 1         20 $r->_setjtitle($self->_rem_brackets($entry->field('journal')));
596             } elsif (defined $entry->field('booktitle')) {
597 0         0 $r->_setjtitle($self->_rem_brackets($entry->field('booktitle')));
598             }
599 1 50       9 if (defined $entry->field('doi')) {$r->_setdoi($entry->field('doi'));}
  0         0  
600             }
601              
602             ####################################################################################
603             sub add_bibtex {
604             # read references from a bibtex file, takes file handle as input
605 1     1 1 389 my $self = shift @_; my $fh = shift @_;
  1         3  
606 1         2 my $opt = shift @_; # options, =1 then use crossref to try to resolve DOIs
607 1 50       5 if (!defined $opt) {$opt=1;} # $opt defaults to 1
  1         2  
608            
609 1         11 my $parser = BibTeX::Parser->new($fh);
610 1         31 while (my $entry = $parser->next ) {
611 1 50       1446 if (!$entry->parse_ok) { next; } # problem, move on
  0         0  
612 1         10 my $doi = $entry->field('doi');
613 1 0 33     12 if ((defined $doi) && (length($doi)>5) && ($opt)) {
      33        
614             # we have a DOI and $opt != 0, use DOI to get rest of citation
615 0         0 my $r = Bib::CrossRef->new;
616 0         0 $r->parse_text($doi);
617 0 0       0 if ($r->score >=1) {
618 0 0 0     0 if (!defined $r->authcount || $r->authcount==0) {
619             # shouldn't happen, but sometimes doi data lacks authors. since we have bibtex data, use it
620 0         0 $self->_bibtex_setauth($r,$entry);
621             }
622 0         0 $self->append($r);
623 0         0 next; # move on
624             }
625             }
626 1         7 my $r = Bib::CrossRef->new;
627             # use bibtex to create citation
628 1         49 $self->_bibtex_parse($r,$entry);
629 1         11 $r->_setscore(1);
630 1         7 $r->_setquery($r->date." ".$r->atitle." ".$r->jtitle);
631             # add manually constructed record
632 1         22 $self->append($r);
633             }
634 1         21 $self->_split_duplicates();
635             }
636              
637             ####################################################################################
638             sub add_fromfile {
639             # read free text references from a file, one reference per line
640             # takes file handle as input
641 0     0 1 0 my $self = shift @_;
642 0         0 my $fh = shift @_;
643 0         0 my @cites;
644 0         0 while (my $line=<$fh>) {
645 0         0 chomp($line);
646 0 0       0 if (length($line)<5) {next;} # skip non-informative lines
  0         0  
647 0         0 push @cites, $line;
648             }
649 0         0 $self->add_details(@cites);
650             }
651              
652             ####################################################################################
653             sub num {
654             # number of references with DOIs
655 8     8 1 14 my $self = shift @_;
656            
657 8         11 my $len = @{$self->{refs}};
  8         14  
658 8         26 return $len;
659             }
660              
661             sub num_nodoi {
662             # number of references without DOIs
663 5     5 1 109 my $self = shift @_;
664            
665 5         6 my $len = @{$self->{nodoi_refs}};
  5         9  
666 5         17 return $len;
667             }
668              
669             ####################################################################################
670             sub getref {
671             # get i'th reference with a DOI
672 7     7 1 14 my ($self, $i) = @_;
673 7         10 return ${$self->{refs}}[$i];
  7         40  
674             }
675              
676             ####################################################################################
677             sub getref_nodoi {
678             # get i'th reference without a DOI
679 2     2 1 6 my ($self, $i) = @_;
680            
681 2         3 return ${$self->{nodoi_refs}}[$i];
  2         10  
682             }
683              
684             ####################################################################################
685             sub print {
686             # display a list of references
687 2     2 1 457 my $self = shift @_;
688 2         5 my $id = shift @_;
689            
690 2 50       5 if ($self->num==0) {return ''};
  0         0  
691 2         4 my $out='';
692 2 100       8 if ($self->{html}) {$out.=$self->getref(0)->printheader($id);}
  1         11  
693 2         24 for (my $i=0; $i< $self->num; $i++) {
694 2 100       7 if ($self->{html}) {$self->getref($i)->sethtml;} else {$self->getref($i)->clearhtml;}
  1         7  
  1         4  
695 2         10 $out .= $self->getref($i)->print($i+1);
696 2         335 $out .= "\n";
697             }
698 2 100       7 if ($self->{html}) {$out.=$self->getref(0)->printfooter;}
  1         4  
699 2         12 return $out;
700             }
701              
702             ####################################################################################
703             sub print_nodoi {
704             # display a list of references
705 1     1 1 3 my $self = shift @_;
706 1         2 my $id = shift @_;
707            
708 1 50       2 if ($self->num_nodoi==0) {return ''};
  0         0  
709 1         3 my $out='';
710 1 50       3 if ($self->{html}) {$out.=$self->getref_nodoi(0)->printheader($id);}
  0         0  
711 1         5 for (my $i=0; $i< $self->num_nodoi; $i++) {
712 1 50       4 if ($self->{html}) {$self->getref_nodoi($i)->sethtml;} else {$self->getref_nodoi($i)->clearhtml;}
  0         0  
  1         5  
713 1         15 $out .= $self->getref_nodoi($i)->print($i+1);
714             }
715 1 50       5 if ($self->{html}) {$out.=$self->getref_nodoi(0)->printfooter;}
  0         0  
716 1         4 return $out;
717             }
718              
719             ####################################################################################
720             sub send_resp {
721             # generate simple web page with results ...
722            
723 1     1 1 3 my $self = shift @_;
724              
725 1 50 33     4 if ($self->num==0 && $self->num_nodoi==0) {return 'No Results'};
  0         0  
726 1         2 my $html = $self->{html};
727 1         5 $self->sethtml; # force use of html
728 1         3 my $out='';
729             #$out.="Content-Type: text/html;\n\n"; # html header
730 1         46 $out.=sprintf "%s", '',"\n";
731 1         6 $out.=sprintf "%s", '';
732 1         13 $out.=sprintf "%s", '',"\n";
733 1         6 $out.=sprintf "%s", $self->print('doi');
734 1 50       4 if ($self->num_nodoi>0) {
735 0         0 $out.=sprintf "%s", '

These have no DOIs:

',"\n";
736 0         0 $out.=sprintf "%s", $self->print_nodoi('nodoi');
737             }
738 1         3 $out.=sprintf "%s", '
';
739 1         3 $out.=sprintf "%s", '';
740 1         2 $self->{html} = $html; # restore previous setting
741 1         13 return $out;
742             }
743              
744             1;
745              
746             =pod
747            
748             =head1 NAME
749            
750             Bib::Tools - For managing collections of Bib::CrossRef references.
751            
752             =head1 SYNOPSIS
753              
754             use strict;
755             use Bib::Tools;
756            
757             # Create a new object
758              
759             my $refs = Bib::Tools->new();
760            
761             # Add some bibliometric info e.g. as text, one reference per line
762              
763             $text=<<"END";
764             10.1109/lcomm.2011.040111.102111
765             10.1109/tnet.2010.2051038
766             END
767             open $fh, '<', \$text;
768             $refs->add_fromfile($fh);
769            
770             or
771            
772             $text=<<"END";
773             Dangerfield, I., Malone, D., Leith, D.J., 2011, Incentivising fairness and policing nodes in WiFi, IEEE Communications Letters, 15(5), pp500-502
774             D. Giustiniano, D. Malone, D.J. Leith and K. Papagiannaki, 2010. Measuring transmission opportunities in 802.11 links. IEEE/ACM Transactions on Networking, 18(5), pp1516-1529
775             END
776             open $fh, '<', \$text;
777             $refs->add_fromfile($fh);
778              
779             # or as text scraped from a google scholar personal home page
780              
781             $refs->add_google('http://scholar.google.com/citations?user=n8dX1fUAAAAJ');
782              
783             # or as text obtained from ORCID (www.orcid.org)
784              
785             $refs->add_orcid('0000-0003-4056-4014');
786              
787             # or as text from PubMed
788              
789             $refs->add_pubmed('mills kh[author]');
790            
791             # or as text from DBLP
792              
793             $refs->add_dblp('http://www.informatik.uni-trier.de/~ley/pers/xx/l/Leith:Douglas_J=');
794              
795             # Bib:Tools will use Bib:CrossRef to try to resolve the supplied text into full citations. It will try to
796             detect duplicates using DOI information, so its fairly safe to import from multiple sources without creating
797             clashes. Full citations without DOI information are kept separately from those with a DOI for better quality
798             control.
799              
800             # The resulting list of full citations containing DOI's can be printed out in human readable form using
801              
802             print $refs->print;
803              
804             # and the list of full citations without DOI's
805              
806             print $refs->print_nodoi;
807              
808             # or the complete citation list can also be output as a simple web page using
809              
810             print $refs->send_resp;
811              
812             =head1 METHODS
813            
814             =head2 new
815              
816             my $refs = Bib::Tools->new();
817              
818             Creates a new Bib::Tools object. Queries to crossref via Bib::CrossRef are rate limited. To change the ratelimit pass this as
819             an option to new e.g $refs = Bib::Tools->new(3) sets the rate limit to 3 queries per second.
820              
821             =head2 add_google
822              
823             $refs->add_google($url);
824            
825             Scrapes citation information from a google scholar personal home page (*not* a search page, see below) and tries
826             resolve it into full citations using crossref.
827              
828             =head2 add_google_search
829              
830             $refs->add_google_search($url);
831            
832             Scrapes citation information from a google scholar search page and tries to resolve into full citations. A different
833             method is needed for search and home pages due to the different html tags used.
834              
835             =head2 add_orcid
836              
837             $refs->add_orcid($orcid_id);
838            
839             Uses the ORCID API to extract citations for the specified user identifier. If possible, the DOI is obtained and then resolved using crossref.
840              
841             =head2 add_dblp
842              
843             $refs->add_dblp($url);
844            
845             Uses DBLP XML API to extract citations. If possible, the DOI is obtained and then resolved using crossref. E.g.
846              
847             $refs->add_dblp('http://www.informatik.uni-trier.de/~ley/pers/xx/l/Leith:Douglas_J=');
848            
849             =head2 add_pubmed
850            
851             $refs->add_dblp($query);
852            
853             Uses PubMed API to extract citations listed in response to a query. E.g.
854              
855             $refs->add_pubmed('mills kh[author]');
856            
857             =head2 add_details
858              
859             $refs->add_details(@lines);
860            
861             Given a array of strings, one per citation, tries to resolve these into full citations.
862              
863             =head2 add_bibtex
864              
865             $refs->add_bibtex($fh, $opt);
866            
867             Given a file handle to a file containing bibtex entries, imports these citations. If a citation has a DOI and $opt is non-zero (the default), this
868             will be used to try to obtain the full citation from crossref.org.
869              
870             =head2 add_fromfile
871              
872             $refs->add_fromfile($fh);
873            
874             Given a file handle to a text file, with one citation per line, tries to resolve these into full citations.
875            
876             =head2 print
877              
878             my $info = $refs->print;
879              
880             Display the list of full citations that have DOIs in human readable form.
881              
882             =head2 print_nodoi
883              
884             my $info = $refs->print_nodoi;
885              
886             Display the list of full citations without DOIs in human readable form.
887              
888             =head2 sethtml
889              
890             $refs->sethtml
891            
892             Set the output format to be html
893              
894             =head2 clearhtml
895              
896             $refs->clearhtml
897            
898             Set the output format to be plain text
899              
900             =head2 send_resp
901              
902             my $info = $refs->send_resp;
903              
904             =head2 num
905              
906             my $num = $refs->num;
907            
908             Returns the number of full citations that have DOIs
909              
910             =head2 num_nodoi
911              
912             my $num = $refs->num_nodoi;
913            
914             Returns the number of full citations without DOIs
915              
916             =head2 getref
917              
918             my $ref = $refs->getref($i);
919            
920             Returns the $i citation from the list with DOIs. This can be used to walk the list of citations.
921              
922             =head2 getref_nodoi
923              
924             my $ref = $refs->getref_nodoi($i);
925              
926             Returns the $i citation from the list without DOIs
927              
928             =head2 append
929              
930             my $ref = Bib::CrossRef->new;
931             $refs->append($ref);
932              
933             Adds a Bib::CrossRef to end of a Bib::Tools list of objects
934              
935             =head1 EXPORTS
936            
937             You can export the following functions if you do not want to use the object orientated interface:
938              
939             sethtml clearhtml add_details add_google add_google_search add_orcid add_fromfile add_dblp add_pubmed
940             send_resp print print_nodoi num num_nodoi getref getref_nodoi append add_bibtex
941              
942             The tag C is available to easily export everything:
943            
944             use Bib::Tools qw(:all);
945              
946             =head1 WEB INTERFACE
947              
948             A simple web interface to Bib::Tools is contained in the examples folder. This consists of three files: query.html, handle_query.pl and post.js.
949              
950             =head2 query.html
951              
952            
953            
954            
955            

Import References

956            
957            
958            
Use ORCID id:
e.g. 0000-0003-4056-4014
959            

960            
(to import from Scopus, follow these instructions, and for Web of Science/ResearcherId follow these)
961              
962            
Use DBLP XML page:
963             e.g. http://www.informatik.uni-trier.de/~ley/pers/xx/l/Leith:Douglas_J=
964            
965              
966            
Use PubMed query:
e.g. mills kh[author]
967            
968              
969            
BibTeX file:
970            
971              
972            
Use Google Scholar personal page:
973             e.g. http://scholar.google.com/citations?user=n8dX1fUAAAAJ
974            

975              
976            
Use Google Scholar search page:
977             e.g. http://scholar.google.com/scholar?q=andr%C3%A9s+garcia+saavedra
978            

979              
980            
981            

Enter references, one per line (free form text):

982            
983             (can be slow, be patient)
984            
985            
Source: Bib::Tools
986            
987            
988             =head2 handle_query.pl
989              
990             #!/usr/bin/perl
991             use Bib::CrossRef;
992             use Bib::Tools;
993             use CGI;
994              
995             # send html header
996             print "Content-Type: text/html;\n\n";
997              
998             my $q = CGI->new;
999             my $refs = Bib::Tools->new;
1000             my $orcid = scalar $q->param('orcid');
1001             $orcid =~ /([0-9\-]+)$/; # extract id out of url
1002             $orcid = $1;
1003             if (length($orcid) > 5) {
1004             $refs->add_orcid($1);
1005             }
1006              
1007             my $google = scalar $q->param('google'); #NB: CGI has already carried out URL decoding
1008             if (length($google) > 5) {
1009             if (!($google =~ m/^http/)) { $google = "http://".$google;}
1010             $refs->add_google($google);
1011             }
1012              
1013             my $google2 = scalar $q->param('google2'); #NB: CGI has already carried out URL decoding
1014             if (length($google2) > 5) {
1015             if (!($google2 =~ m/^http/)) { $google2 = "http://".$google2;}
1016             $refs->add_google_search($google2);
1017             }
1018              
1019             my $dblp = scalar $q->param('dblp');
1020             if (length($dblp) > 5) {
1021             if (!($dblp =~ m/^http/)) { $dblp = "http://".$dblp;}
1022             if ($dblp =~ m/http:\/\/dblp.uni-trier.de\/pers\/xx\/l\/.+/) {
1023             # looks like a valid dblp url
1024             $refs->add_dblp($dblp);
1025             } else {
1026             print "

DBLP url looks invalid: ", $dblp,"

";
1027             }
1028             }
1029              
1030             my $pubmed = scalar $q->param('pubmed');
1031             if (length($pubmed) > 5) {
1032             $refs->add_pubmed($pubmed);
1033             }
1034              
1035             $filename = scalar $q->param('bibtex');
1036             $tmpfilename = $q->tmpFileName($filename);
1037             open my $fh, "<", $tmpfilename;
1038             $refs->add_bibtex($fh);
1039            
1040             my @values = $q->multi_param('refs');
1041             foreach my $value (@values) {
1042             open my $fh, "<", \$value; #NB: CGI has already carried out URL decoding
1043             $refs->add_fromfile($fh);
1044             }
1045              
1046             $refs->sethtml;
1047             print $refs->send_resp;
1048              
1049             =head2 post.js
1050              
1051             function GetCellValues(dataTable) {
1052             var table = document.getElementById(dataTable);
1053             if (table == null) return;
1054             var i = 0; var Obj = [];
1055             var names = table.rows[0];
1056             for (var r = 1; r < table.rows.length; r++) {
1057             if (table.rows[r].id == 'cite') {
1058             var row = table.rows[r].cells;
1059             var check = table.rows[r].getElementsByTagName('Input');
1060             if (check.length>0){
1061             Obj[i] = {};
1062             for (var c = 3; c < row.length; c++){
1063             var tag = names.cells[c].textContent;
1064             Obj[i][tag] =row[c].textContent;
1065             }
1066             i = i+1;
1067             }
1068             }
1069             }
1070             var jsonString = JSON.stringify(Obj);
1071             document.getElementById('out').innerHTML = document.getElementById('out').innerHTML+jsonString;
1072             // or POST using ajax
1073             }
1074              
1075             =head1 VERSION
1076            
1077             Ver 0.15
1078            
1079             =head1 AUTHOR
1080            
1081             Doug Leith
1082            
1083             =head1 BUGS
1084            
1085             Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.
1086            
1087             =head1 COPYRIGHT
1088            
1089             Copyright 2015 D.J.Leith.
1090            
1091             This program is free software; you can redistribute it and/or modify it under the terms of either: the GNU General Public License as published by the Free Software Foundation; or the Artistic License.
1092            
1093             See http://dev.perl.org/licenses/ for more information.
1094            
1095             =cut
1096              
1097              
1098             __END__