File Coverage

blib/lib/Bib/Tools.pm

Criterion	Covered	Total	%
statement	191	543	35.1
branch	33	194	17.0
condition	11	60	18.3
subroutine	30	43	69.7
pod	19	19	100.0
total	284	859	33.0

line

stmt

bran

cond

sub

pod

time

code

############################################################

# Bib::Tools - For managing collections of Bib::CrossRef references.

############################################################

package Bib::Tools;

70451

use 5.8.8;

use strict;

use warnings;

no warnings 'uninitialized';

require Exporter;

559

use Bib::CrossRef;

84803

use LWP::UserAgent;

use JSON qw/decode_json/;

105

use URI::Escape qw(uri_escape_utf8 uri_unescape);

use HTML::Entities qw(decode_entities encode_entities);

651

use HTML::TreeBuilder::XPath;

61411

use XML::Simple qw(XMLin);

634

use BibTeX::Parser qw(new next);

12050

595

use IO::File;

9379

113

use vars qw($VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS @ISA);

6194

#use LWP::Protocol::https;

#use Data::Dumper;

$VERSION = '0.17';

@ISA = qw(Exporter);

@EXPORT = qw();

@EXPORT_OK = qw(

sethtml clearhtml add_details add_google add_google_search add_orcid add_fromfile add_dblp add_pubmed

send_resp print print_nodoi num num_nodoi getref getref_nodoi append add_bibtex

);

%EXPORT_TAGS = (all => \@EXPORT_OK);

####################################################################################

sub new {

1756

my $self;

# defaults

$self->{refs} = []; # the references

$self->{nodoi_refs} = [];

$self->{duprefs} = [];

$self->{html}=0;

$self->{ratelimit}=5; # limit of 5 crossref queries per sec

$self->{last} = {};

bless $self;

my $ratelimit = $_[1];

if (defined($ratelimit) && ($ratelimit>=0)) {$self->{ratelimit}=$ratelimit};

return $self;

}

####################################################################################

sub sethtml {

my $self = shift @_;

$self->{html}=1;

}

####################################################################################

sub clearhtml {

my $self = shift @_;

$self->{html}=0;

}

####################################################################################

sub _err {

my ($self, $str) = @_;

if ($self->{html}) {

print "

",$str,"

} else {

print $str,"\n";

}

####################################################################################

sub _split_duplicates {

# split list of references into three lists: one with refs that have unique doi's, one with no doi's

#and one with all the rest (with duplicate doi's)

my $self = shift @_;

my @refs=@{$self->{refs}};

my @newrefs;

foreach my $ref (@refs) {

my $doi = $ref->doi();

100

if (!defined($doi) || length($doi)==0) {push @{$self->{nodoi_refs}}, $ref; next; }# skip entries with no DOI

my $found = 0;

foreach my $ref2 (@newrefs) {

if ($ref2->doi() eq $doi) {

$found = 1;

}

100

if (!$found) {

push @newrefs, $ref;

} else {

push @{$self->{duprefs}}, $ref;

}

100

$self->{refs} = \@newrefs;

101

}

102

103

####################################################################################

104

sub append {

105

# add new reference to end of existing list

106

1557

my $self = shift @_;

107

my $ref = shift @_;

108

push @{$self->{refs}}, $ref;

109

}

110

111

####################################################################################

112

sub add_details {

113

# given an array of raw strings, try to convert into paper references

114

115

380

my $self = shift @_;

116

foreach my $cites (@_) {

117

$self->{last} = Bib::CrossRef->new();

118

$self->{last}->parse_text($cites);

119

15674

$self->append($self->{last});

120

199

sleep 1/(0.001+$self->{ratelimit}); # rate limit queries to crossref

121

}

122

$self->_split_duplicates();

123

}

124

125

####################################################################################

126

sub add_google {

127

# scrape paper details from google scholar personal page -- nb: no doi info on google, so use crossref.org to obtain this

128

# nb: doesn't work with google scholar search results

129

130

my $self = shift @_;

131

my $url = shift @_;

132

my $ua = LWP::UserAgent->new;

133

$ua->agent('Mozilla/5.0');

134

my $req = HTTP::Request->new(GET => $url);

135

my $res = $ua->request($req);

136

if ($res->is_success) {

137

my $tree= HTML::TreeBuilder::XPath->new;

138

$tree->parse($res->decoded_content);

139

my @atitles=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/a[@class="gsc_a_at"]');

140

my @authors=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/div[@class="gs_gray"][1]');

141

my @jtitles=$tree->findvalues('//tr[@class="gsc_a_tr"]/td/div[@class="gs_gray"][2]');

142

my $len1 = @atitles; my $len2 = @authors; my $len3 = @jtitles;

143

if (($len1 != $len2) || ($len1 != $len3) || ($len2 != $len3)) {$self->_err("Problem parsing google page: mismatched $len1 titles/$len2 authors/$len3 journals.");return []}

144

for (my $i = 0; $i<$len1; $i++) {

145

# these are already utf8

146

$authors[$i] = decode_entities($authors[$i]);

147

$atitles[$i] = decode_entities($atitles[$i]);

148

$jtitles[$i] = decode_entities($jtitles[$i]);

149

my $temp = $authors[$i].", ".$atitles[$i].", ".$jtitles[$i];

150

my $r = Bib::CrossRef->new;

151

$r->parse_text($temp);

152

$jtitles[$i] =~ m/\s([0-9][0-9][0-9][0-9])$/;

153

my $year=$1;

154

if ((length($year)==4) && ($r->date ne $1)) {

155

$r->_setscore(0.5); # mismatch in year, probably bad

156

}

157

$self->append($r);

158

}

159

} else {

160

$self->_err("Problem with $url: ".$res->status_line);

161

}

162

}

163

164

####################################################################################

165

sub add_google_search {

166

# scrape paper details from google scholar search results -- *not* from persons scholar home page

167

168

my $self = shift @_;

169

my $url = shift @_;

170

my $ua = LWP::UserAgent->new;

171

$ua->agent('Mozilla/5.0');

172

my $req = HTTP::Request->new(GET => $url);

173

my $res = $ua->request($req);

174

if ($res->is_success) {

175

my $tree= HTML::TreeBuilder::XPath->new;

176

$tree->parse($res->decoded_content);

177

my @atitles=$tree->findvalues('//div[@class="gs_ri"]/h3/a');

178

my @authors=$tree->findvalues('//div[@class="gs_a"]');

179

my $len1 = @atitles; my $len2 = @authors;

180

if ($len1 != $len2) {$self->_err("Problem parsing google page: mismatched $len1 titles/$len2 authors.");return [];}

181

my @cites=();

182

for (my $i = 0; $i<$len1; $i++) {

183

$authors[$i] = decode_entities($authors[$i]);

184

$atitles[$i] = decode_entities($atitles[$i]);

185

my $str = $authors[$i].", ".$atitles[$i];

186

if (length($str)>5) { # a potentially useful entry ?

187

push @cites, $authors[$i].", ".$atitles[$i];

188

}

189

}

190

$self->add_details(@cites);

191

} else {

192

$self->_err("Problem with $url: ".$res->status_line);

193

}

194

}

195

196

####################################################################################

197

sub _dblp_setauth {

198

my $self = shift @_; my $r = shift @_; my $cite = shift @_;

199

200

if (ref($cite->{'author'}) eq "HASH") {

201

$r->_setauthcount(1);

202

$r->_setauth(1,$cite->{'author'});

203

} else {

204

my $count = 0;

205

foreach my $au (@{$cite->{'author'}}) {

206

$count++;

207

$r->_setauth($count, $au);

208

}

209

$r->_setauthcount($count);

210

}

211

}

212

213

####################################################################################

214

sub add_dblp {

215

# get details using DBLP XML API

216

217

my $self = shift @_;

218

my $url = shift @_;

219

my $maxnum = shift @_; if (!defined($maxnum)) {$maxnum=-1;}

220

221

my $ua = LWP::UserAgent->new;

222

$ua->agent('Mozilla/5.0');

223

my $req = HTTP::Request->new(GET => $url);

224

my $res = $ua->request($req);

225

if ($res->is_success) {

226

my $xs = XML::Simple->new();

227

my $data = $xs->XMLin($res->decoded_content);

228

my @cites; my @ctemp;

229

if (defined $data->{'r'}) {

230

# a person page

231

@cites = $data->{'r'};

232

} elsif (defined $data->{'article'}) {

233

# its xml for a single article

234

$ctemp[0] = $data;

235

push @cites, \@ctemp;

236

}

237

my $num=0;

238

foreach my $c (@{$cites[0]}) {

239

$num++; if ($maxnum>0 && $num>$maxnum) {last;} # mainly for testing

240

my @k = keys %{$c};

241

my $cite = $c->{$k[0]};

242

my $ee = $cite->{'ee'};

243

if ($ee =~ m/dx.doi.org/) {

244

# we have a DOI, lets call crossref

245

$ee =~ s/http:\/\/dx.doi.org\///;

246

my $r = Bib::CrossRef->new;

247

$r->parse_text($ee);

248

if ($r->score >=1) {

249

if (!defined $r->authcount || $r->authcount==0) {

250

# shouldn't happen, but sometimes doi data lacks authors so use dblp data

251

$self->_dblp_setauth($r,$cite);

252

}

253

$self->append($r);

254

next; # move on to next record

255

}

256

}

257

my $jtitle='';

258

if (defined $cite->{'journal'}) {

259

$jtitle = $cite->{'journal'};

260

} elsif (defined $cite->{'booktitle'}) {

261

$jtitle = $cite->{'booktitle'};

262

}

263

my $temp = $cite->{'year'}.' '.$cite->{'title'}.' '.$jtitle. ' ';

264

if (ref($cite->{'author'}) eq "HASH") {

265

$temp .= $cite->{'author'};

266

} else {

267

foreach my $au (@{$cite->{'author'}}) { $temp .= $au.", ";}

268

}

269

my $r = Bib::CrossRef->new;

270

$r->parse_text($temp);

271

if ($r->score >= 1) {

272

# found an ok match, lets use it

273

$self->append($r);

274

next; # move on

275

}

276

277

# we got a poor match, lets use the rest of the dblp data

278

$r = Bib::CrossRef->new;

279

if (exists $cite->{'publtype'}) {

280

$r->_setgenre($cite->{'publtype'});

281

} elsif ($k[0] =~ m/article/) {

282

$r->_setgenre('article');

283

} elsif ($k[0] =~ m/inproceedings/) {

284

$r->_setgenre('proceeding');

285

} elsif ($k[0] =~ m/informal/) {

286

$r->_setgenre('preprint');

287

} else {

288

$r->_setgenre($k[0]);

289

}

290

$r->_setdate($cite->{'year'}); $r->_setatitle($cite->{'title'}); $r->_setjtitle($jtitle);

291

$self->_dblp_setauth($r,$cite);

292

if (defined $cite->{'volume'}) {$r->_setvolume($cite->{'volume'});}

293

if (defined $cite->{'number'}) {$r->_setissue($cite->{'number'});}

294

if (defined $cite->{'pages'}) {

295

my @bits = split('-',$cite->{'pages'});

296

if (defined $bits[0]) {$r->_setspage($bits[0]);}

297

if (defined $bits[1]) {$r->_setepage($bits[1]);}

298

}

299

if (($cite->{'ee'} =~ m/^http:\/\//)) {$r->_seturl($cite->{'ee'});}

300

$r->_setscore(1);

301

$r->_setquery($temp);

302

# add manually constructed record

303

$self->append($r);

304

}

305

$self->_split_duplicates();

306

} else {

307

$self->_err("Problem with $url: ".$res->status_line);

308

}

309

}

310

311

####################################################################################

312

sub _orcid_getdoi {

313

# extract DOI from an orcid entry

314

my $cite = shift @_;

315

316

my $doi='';

317

my $c = $cite->{'work-external-identifiers'}->{'work-external-identifier'};

318

if (ref($c) eq "HASH") {

319

# a single value

320

if ($c->{'work-external-identifier-type'} =~ m/doi/) {

321

# and its a DOI

322

$doi = $c->{'work-external-identifier-id'};

323

}

324

} else {

325

# multiple values

326

foreach my $id (@{$c}) {

327

if ($id->{'work-external-identifier-type'} =~ m/doi/) {

328

# its a DOI

329

$doi = $id->{'work-external-identifier-id'};

330

last; # exit loop

331

}

332

}

333

}

334

return $doi;

335

}

336

337

####################################################################################

338

sub _orcid_getauth {

339

# generate an author string from an orcid entry

340

my $cite = shift @_;

341

342

my $auth='';

343

my $c = $cite->{'work-contributors'}->{'contributor'};

344

if (ref($c) eq "HASH") {

345

# single author

346

if ($c->{'contributor-attributes'}->{'contributor-role'} =~ m/author/) {$auth = $c->{'credit-name'}->{'content'};}

347

} else {

348

# multiple authors

349

foreach my $au (@{$c}) {

350

if ($au->{'contributor-attributes'}->{'contributor-role'} =~ m/author/) {$auth .= $au->{'credit-name'}->{'content'}.", ";}

351

}

352

}

353

return $auth;

354

}

355

356

####################################################################################

357

sub _orcid_setauth {

358

# use an orcid entry to set citation author list (using orcid bibtex data if appropriate)

359

my $self = shift @_; my $r = shift @_;

360

my $cite = shift @_; my $entry = shift @_;

361

362

my $c = $cite->{'work-contributors'}->{'contributor'};

363

if (defined $c) {

364

# we have an orcid author entry

365

my $authcount=0;

366

if (ref($c) eq "HASH") {

367

# single author

368

if ($c->{'contributor-attributes'}->{'contributor-role'} =~ m/author/) {

369

$authcount++;

370

$r->_setauth($authcount,$c->{'credit-name'}->{'content'});

371

}

372

} else {

373

# multiple authors

374

foreach my $au (@{$c}) {

375

if ($au->{'contributor-attributes'}->{'contributor-role'} =~ m/author/) {

376

$authcount++;

377

$r->_setauth($authcount,$au->{'credit-name'}->{'content'});

378

}

379

}

380

}

381

$r->_setauthcount($authcount);

382

if ($authcount>0) {return;} # found some authors, finish up

383

}

384

# no author info, lets see if bibtex has any author info

385

if (defined $entry) {

386

$self->_bibtex_setauth($r,$entry);

387

}

388

}

389

390

####################################################################################

391

sub add_orcid {

392

# get paper details from orcid using API

393

394

my $self = shift @_; my $orcid_id = shift @_;

395

396

my $ua = LWP::UserAgent->new;

397

my $req = HTTP::Request->new(GET => "http://pub.orcid.org/$orcid_id/orcid-works/");

398

my $res = $ua->request($req);

399

if ($res->is_success) {

400

my $xs = XML::Simple->new();

401

# the orcid response is utf8 xml

402

my $data = $xs->XMLin($res->decoded_content);

403

my @cites = $data->{'orcid-profile'}->{'orcid-activities'}->{'orcid-works'}->{'orcid-work'};

404

foreach my $cite (@{$cites[0]}) {

405

my $entry = undef;

406

if ($cite->{'work-citation'}->{'work-citation-type'} =~ m/bibtex/) {

407

# we have a bibtex reference, extract some extra info

408

my $bibtex = $cite->{'work-citation'}->{'citation'};

409

open my $fh, '<', \$bibtex;

410

my $parser = BibTeX::Parser->new($fh);

411

$entry = $parser->next;

412

if (!$entry->parse_ok) {$entry = undef;}

413

}

414

my $doi = _orcid_getdoi($cite);

415

if ((defined $doi) && (length($doi)>5)) { # we seem to have a DOI

416

# use DOI to search.crossref.org

417

my $r = Bib::CrossRef->new;

418

$r->parse_text($doi);

419

if ($r->score>=1) {

420

if (!defined $r->authcount || $r->authcount==0) {

421

# shouldn't happen, but sometimes doi data lacks authors so use orcid data

422

$self->_orcid_setauth($r,$cite,$entry);

423

}

424

$self->append($r);

425

next; # move on

426

}

427

}

428

# use title etc to search.crossref.org

429

my $date; my $atitle; my $jtitle;

430

if (exists $cite->{'publication-date'}->{'year'}) {$date = $cite->{'publication-date'}->{'year'};}

431

if (exists $cite->{'work-title'}->{'title'}) {$atitle = $cite->{'work-title'}->{'title'};}

432

if (exists $cite->{'journal-title'}) {$jtitle = $cite->{'journal-title'};}

433

my $auth = _orcid_getauth($cite);

434

my $temp=$auth.' '.$date.' '.$atitle.' '.$jtitle;

435

if (length($temp)>10 && length($date)>0 && length($atitle)+length($auth)>0) { # we have a potentially useful search string

436

my $r = Bib::CrossRef->new;

437

$r->parse_text($temp);

438

if ($r->score >= 1) {

439

# found an ok match, lets use it

440

$self->append($r);

441

next; # move on

442

}

443

}

444

# for a poor match, try to extract rest of info from orcid

445

my $r = Bib::CrossRef->new;

446

$r->_setdate($date); $r->_setatitle($atitle); $r->_setjtitle($jtitle);

447

if (exists $cite->{'work-type'}) {$r->_setgenre($cite->{'work-type'});}

448

$self->_orcid_setauth($r,$cite);

449

$self->_bibtex_parse($r,$entry);

450

$r->_setscore(1);

451

$r->_setquery($temp);

452

# add manually constructed record

453

$self->append($r);

454

}

455

$self->_split_duplicates();

456

} else {

457

$self->_err("Problem with orcid.org: ".$res->status_line);

458

}

459

}

460

461

####################################################################################

462

sub _find_pubmed {

463

my $c = shift @_;

464

my $name = shift @_;

465

my $term = shift @_;

466

foreach my $item (@{$c}) {

467

if ($item->{'Name'} eq $name) {

468

return $item->{$term};

469

}

470

}

471

return undef;

472

}

473

474

####################################################################################

475

sub add_pubmed {

476

# add results from a pubmed query

477

my ($self,$q) = @_;

478

479

my $ua = LWP::UserAgent->new;

480

$q =~ s/\s+/+/g;

481

my $req = HTTP::Request->new(GET => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?usehistory=y&db=pubmed&term=".$q);

482

my $res = $ua->request($req);

483

if ($res->is_success) {

484

my $web = $1 if ($res->decoded_content =~ /(\S+)<\/WebEnv>/);

485

my $key = $1 if ($res->decoded_content =~ /(\d+)<\/QueryKey>/);

486

$req = HTTP::Request->new(GET => "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&query_key=$key&WebEnv=$web");

487

$res = $ua->request($req);

488

if ($res->is_success) {

489

my $xs = XML::Simple->new();

490

my $data = $xs->XMLin($res->decoded_content);

491

my @cites = $data->{'DocSum'};

492

foreach my $cite (@{$cites[0]}) {

493

my $c = $cite->{'Item'};

494

if (ref($c) ne "ARRAY") {next;}

495

my $r = Bib::CrossRef->new;

496

my $doi = _find_pubmed($c,'DOI','content');

497

if (defined $doi) {

498

# PubMed is reliable, no need to call crossref

499

# my $r = Bib::CrossRef->new;

500

# $r->parse_text($doi);

501

# $self->append($r);

502

# next; # move on

503

$r->_setdoi($doi);

504

$r->_seturl('http://dx.doi.org/'.$doi);

505

}

506

$r->_setjtitle(_find_pubmed($c,'FullJournalName','content'));

507

$r->_setatitle(_find_pubmed($c,'Title','content'));

508

my $date = _find_pubmed($c,'PubDate','content');

509

$date =~ m/^([0-9][0-9][0-9][0-9])/;

510

$r->_setdate($1); # extract the year

511

$r->_setvolume(_find_pubmed($c,'Volume','content'));

512

$r->_setissue(_find_pubmed($c,'Issue','content'));

513

my $p = _find_pubmed($c,'Pages','content');

514

my @bits = split('-',$p);

515

$r->_setspage($bits[0]); $r->_setepage($bits[1]);

516

517

my $aulist = _find_pubmed($c,'AuthorList','Item');

518

my $authcount=0;

519

if (ref($aulist) ne "ARRAY") {

520

$authcount = 1;

521

$r->_setauth($authcount,$aulist->{'content'});

522

} else {

523

foreach my $au (@{$aulist}) {

524

$authcount++;

525

$r->_setauth($authcount,$au->{'content'});

526

}

527

}

528

$r->_setauthcount($authcount);

529

my $g = _find_pubmed($c,'FullJournalName','Item');

530

$r->_setgenre($g->{'content'});

531

$r->_setscore(1);

532

#$r->_setquery($auth." ".$temp);

533

# add manually constructed record

534

$self->append($r);

535

}

536

}

537

$self->_split_duplicates();

538

return;

539

}

540

$self->_err("Problem with http://eutils.ncbi.nlm.nih.gov: ".$res->status_line);

541

}

542

543

####################################################################################

544

sub _rem_brackets {

545

# remove {} brackets from bibtex entry

546

my $self = shift @_; my $str = shift @_;

547

548

$str =~ s/[\{\}]//g;

549

$str =~ s/\\textquotesingle/\\'/g;

550

return $str

551

}

552

553

####################################################################################

554

sub _bibtex_setauth {

555

# use author list array to populate our author details

556

my $self = shift @_; my $r = shift @_; my $entry = shift @_;

557

558

if ((!defined $r) || (!defined $entry) || (!$entry->parse_ok)) {return;}

559

560

my @authors = $entry->author;

561

my $count = 0;

562

foreach my $author (@authors) {

563

$count++;

564

$r->_setauth($count,$self->_rem_brackets($author->first).' '.$self->_rem_brackets($author->last));

565

};

566

$r->_setauthcount($count);

567

}

568

569

####################################################################################

570

sub _bibtex_parse {

571

# use data in a bibtext entry to populate citation

572

my $self = shift @_; my $r = shift @_; my $entry = shift @_;

573

574

if ((!defined $r) || (!defined $entry) || (!$entry->parse_ok)) {return;}

575

576

my $genre = lc($entry->type);

577

if (($genre eq "inproceedings") || ($genre eq "proceedings")) {

578

$r->_setgenre("proceeding");

579

} else {

580

$r->_setgenre($genre);

581

}

582

if (defined $entry->field('title')) { $r->_setatitle($self->_rem_brackets($entry->field('title')));}

583

if (defined $entry->author) {$self->_bibtex_setauth($r,$entry);}

499

584

if (defined $entry->field('url')) {$r->_seturl($entry->field('url'))};

585

if (defined $entry->field('year')) {$r->_setdate($entry->field('year'))};

586

if (defined $entry->field('volume')) {$r->_setvolume($entry->field('volume'))};

587

if (defined $entry->field('issue')) {$r->_setissue($entry->field('issue'))};

588

if (defined $entry->field('pages')) {

589

my $pages = $entry->field('pages');

590

(my $s, my $e) = ($pages =~ /([0-9]+)-+([0-9]+)/ );

591

if (defined $s) { $r->_setspage($s); }

592

if (defined $e) { $r->_setepage($e); }

593

}

594

if (defined $entry->field('journal')) {

595

$r->_setjtitle($self->_rem_brackets($entry->field('journal')));

596

} elsif (defined $entry->field('booktitle')) {

597

$r->_setjtitle($self->_rem_brackets($entry->field('booktitle')));

598

}

599

if (defined $entry->field('doi')) {$r->_setdoi($entry->field('doi'));}

600

}

601

602

####################################################################################

603

sub add_bibtex {

604

# read references from a bibtex file, takes file handle as input

605

389

my $self = shift @_; my $fh = shift @_;

606

my $opt = shift @_; # options, =1 then use crossref to try to resolve DOIs

607

if (!defined $opt) {$opt=1;} # $opt defaults to 1

608

609

my $parser = BibTeX::Parser->new($fh);

610

while (my $entry = $parser->next ) {

611

1446

if (!$entry->parse_ok) { next; } # problem, move on

612

my $doi = $entry->field('doi');

613

if ((defined $doi) && (length($doi)>5) && ($opt)) {

614

# we have a DOI and $opt != 0, use DOI to get rest of citation

615

my $r = Bib::CrossRef->new;

616

$r->parse_text($doi);

617

if ($r->score >=1) {

618

if (!defined $r->authcount || $r->authcount==0) {

619

# shouldn't happen, but sometimes doi data lacks authors. since we have bibtex data, use it

620

$self->_bibtex_setauth($r,$entry);

621

}

622

$self->append($r);

623

next; # move on

624

}

625

}

626

my $r = Bib::CrossRef->new;

627

# use bibtex to create citation

628

$self->_bibtex_parse($r,$entry);

629

$r->_setscore(1);

630

$r->_setquery($r->date." ".$r->atitle." ".$r->jtitle);

631

# add manually constructed record

632

$self->append($r);

633

}

634

$self->_split_duplicates();

635

}

636

637

####################################################################################

638

sub add_fromfile {

639

# read free text references from a file, one reference per line

640

# takes file handle as input

641

my $self = shift @_;

642

my $fh = shift @_;

643

my @cites;

644

while (my $line=<$fh>) {

645

chomp($line);

646

if (length($line)<5) {next;} # skip non-informative lines

647

push @cites, $line;

648

}

649

$self->add_details(@cites);

650

}

651

652

####################################################################################

653

sub num {

654

# number of references with DOIs

655

my $self = shift @_;

656

657

my $len = @{$self->{refs}};

658

return $len;

659

}

660

661

sub num_nodoi {

662

# number of references without DOIs

663

109

my $self = shift @_;

664

665

my $len = @{$self->{nodoi_refs}};

666

return $len;

667

}

668

669

####################################################################################

670

sub getref {

671

# get i'th reference with a DOI

672

my ($self, $i) = @_;

673

return ${$self->{refs}}[$i];

674

}

675

676

####################################################################################

677

sub getref_nodoi {

678

# get i'th reference without a DOI

679

my ($self, $i) = @_;

680

681

return ${$self->{nodoi_refs}}[$i];

682

}

683

684

####################################################################################

685

sub print {

686

# display a list of references

687

457

my $self = shift @_;

688

my $id = shift @_;

689

690

if ($self->num==0) {return ''};

691

my $out='';

692

100

if ($self->{html}) {$out.=$self->getref(0)->printheader($id);}

693

for (my $i=0; $i< $self->num; $i++) {

694

100

if ($self->{html}) {$self->getref($i)->sethtml;} else {$self->getref($i)->clearhtml;}

695

$out .= $self->getref($i)->print($i+1);

696

335

$out .= "\n";

697

}

698

100

if ($self->{html}) {$out.=$self->getref(0)->printfooter;}

699

return $out;

700

}

701

702

####################################################################################

703

sub print_nodoi {

704

# display a list of references

705

my $self = shift @_;

706

my $id = shift @_;

707

708

if ($self->num_nodoi==0) {return ''};

709

my $out='';

710

if ($self->{html}) {$out.=$self->getref_nodoi(0)->printheader($id);}

711

for (my $i=0; $i< $self->num_nodoi; $i++) {

712

if ($self->{html}) {$self->getref_nodoi($i)->sethtml;} else {$self->getref_nodoi($i)->clearhtml;}

713

$out .= $self->getref_nodoi($i)->print($i+1);

714

}

715

if ($self->{html}) {$out.=$self->getref_nodoi(0)->printfooter;}

716

return $out;

717

}

718

719

####################################################################################

720

sub send_resp {

721

# generate simple web page with results ...

722

723

my $self = shift @_;

724

725

if ($self->num==0 && $self->num_nodoi==0) {return 'No Results'};

726

my $html = $self->{html};

727

$self->sethtml; # force use of html

728

my $out='';

729

#$out.="Content-Type: text/html;\n\n"; # html header

730

$out.=sprintf "%s", '',"\n";

731

$out.=sprintf "%s", '';

732

$out.=sprintf "%s", '',"\n";

733

$out.=sprintf "%s", $self->print('doi');

734

if ($self->num_nodoi>0) {

735

$out.=sprintf "%s", '

These have no DOIs:

',"\n";

736

$out.=sprintf "%s", $self->print_nodoi('nodoi');

737

}

738

$out.=sprintf "%s", '

739

$out.=sprintf "%s", '';

740

$self->{html} = $html; # restore previous setting

741

return $out;

742

}

743

744

745

746

=pod

747

748

=head1 NAME

749

750

Bib::Tools - For managing collections of Bib::CrossRef references.

751

752

=head1 SYNOPSIS

753

754

use strict;

755

use Bib::Tools;

756

757

# Create a new object

758

759

my $refs = Bib::Tools->new();

760

761

# Add some bibliometric info e.g. as text, one reference per line

762

763

$text=<<"END";

764

10.1109/lcomm.2011.040111.102111

765

10.1109/tnet.2010.2051038

766

END

767

open $fh, '<', \$text;

768

$refs->add_fromfile($fh);

769

770

771

772

$text=<<"END";

773

Dangerfield, I., Malone, D., Leith, D.J., 2011, Incentivising fairness and policing nodes in WiFi, IEEE Communications Letters, 15(5), pp500-502

774

D. Giustiniano, D. Malone, D.J. Leith and K. Papagiannaki, 2010. Measuring transmission opportunities in 802.11 links. IEEE/ACM Transactions on Networking, 18(5), pp1516-1529

775

END

776

open $fh, '<', \$text;

777

$refs->add_fromfile($fh);

778

779

# or as text scraped from a google scholar personal home page

780

781

$refs->add_google('http://scholar.google.com/citations?user=n8dX1fUAAAAJ');

782

783

# or as text obtained from ORCID (www.orcid.org)

784

785

$refs->add_orcid('0000-0003-4056-4014');

786

787

# or as text from PubMed

788

789

$refs->add_pubmed('mills kh[author]');

790

791

# or as text from DBLP

792

793

$refs->add_dblp('http://www.informatik.uni-trier.de/~ley/pers/xx/l/Leith:Douglas_J=');

794

795

# Bib:Tools will use Bib:CrossRef to try to resolve the supplied text into full citations. It will try to

796

detect duplicates using DOI information, so its fairly safe to import from multiple sources without creating

797

clashes. Full citations without DOI information are kept separately from those with a DOI for better quality

798

control.

799

800

# The resulting list of full citations containing DOI's can be printed out in human readable form using

801

802

print $refs->print;

803

804

# and the list of full citations without DOI's

805

806

print $refs->print_nodoi;

807

808

# or the complete citation list can also be output as a simple web page using

809

810

print $refs->send_resp;

811

812

=head1 METHODS

813

814

=head2 new

815

816

my $refs = Bib::Tools->new();

817

818

Creates a new Bib::Tools object. Queries to crossref via Bib::CrossRef are rate limited. To change the ratelimit pass this as

819

an option to new e.g $refs = Bib::Tools->new(3) sets the rate limit to 3 queries per second.

820

821

=head2 add_google

822

823

$refs->add_google($url);

824

825

Scrapes citation information from a google scholar personal home page (*not* a search page, see below) and tries

826

resolve it into full citations using crossref.

827

828

=head2 add_google_search

829

830

$refs->add_google_search($url);

831

832

Scrapes citation information from a google scholar search page and tries to resolve into full citations. A different

833

method is needed for search and home pages due to the different html tags used.

834

835

=head2 add_orcid

836

837

$refs->add_orcid($orcid_id);

838

839

Uses the ORCID API to extract citations for the specified user identifier. If possible, the DOI is obtained and then resolved using crossref.

840

841

=head2 add_dblp

842

843

$refs->add_dblp($url);

844

845

Uses DBLP XML API to extract citations. If possible, the DOI is obtained and then resolved using crossref. E.g.

846

847

$refs->add_dblp('http://www.informatik.uni-trier.de/~ley/pers/xx/l/Leith:Douglas_J=');

848

849

=head2 add_pubmed

850

851

$refs->add_dblp($query);

852

853

Uses PubMed API to extract citations listed in response to a query. E.g.

854

855

$refs->add_pubmed('mills kh[author]');

856

857

=head2 add_details

858

859

$refs->add_details(@lines);

860

861

Given a array of strings, one per citation, tries to resolve these into full citations.

862

863

=head2 add_bibtex

864

865

$refs->add_bibtex($fh, $opt);

866

867

Given a file handle to a file containing bibtex entries, imports these citations. If a citation has a DOI and $opt is non-zero (the default), this

868

will be used to try to obtain the full citation from crossref.org.

869

870

=head2 add_fromfile

871

872

$refs->add_fromfile($fh);

873

874

Given a file handle to a text file, with one citation per line, tries to resolve these into full citations.

875

876

=head2 print

877

878

my $info = $refs->print;

879

880

Display the list of full citations that have DOIs in human readable form.

881

882

=head2 print_nodoi

883

884

my $info = $refs->print_nodoi;

885

886

Display the list of full citations without DOIs in human readable form.

887

888

=head2 sethtml

889

890

$refs->sethtml

891

892

Set the output format to be html

893

894

=head2 clearhtml

895

896

$refs->clearhtml

897

898

Set the output format to be plain text

899

900

=head2 send_resp

901

902

my $info = $refs->send_resp;

903

904

=head2 num

905

906

my $num = $refs->num;

907

908

Returns the number of full citations that have DOIs

909

910

=head2 num_nodoi

911

912

my $num = $refs->num_nodoi;

913

914

Returns the number of full citations without DOIs

915

916

=head2 getref

917

918

my $ref = $refs->getref($i);

919

920

Returns the $i citation from the list with DOIs. This can be used to walk the list of citations.

921

922

=head2 getref_nodoi

923

924

my $ref = $refs->getref_nodoi($i);

925

926

Returns the $i citation from the list without DOIs

927

928

=head2 append

929

930

my $ref = Bib::CrossRef->new;

931

$refs->append($ref);

932

933

Adds a Bib::CrossRef to end of a Bib::Tools list of objects

934

935

=head1 EXPORTS

936

937

You can export the following functions if you do not want to use the object orientated interface:

938

939

sethtml clearhtml add_details add_google add_google_search add_orcid add_fromfile add_dblp add_pubmed

940

send_resp print print_nodoi num num_nodoi getref getref_nodoi append add_bibtex

941

942

The tag C is available to easily export everything:

943

944

use Bib::Tools qw(:all);

945

946

=head1 WEB INTERFACE

947

948

A simple web interface to Bib::Tools is contained in the examples folder. This consists of three files: query.html, handle_query.pl and post.js.

949

950

=head2 query.html

951

952

953

954

955

Import References

956

957

958
Use ORCID id: e.g. 0000-0003-4056-4014
959
960
(to import from Scopus, follow these instructions, and for Web of Science/ResearcherId follow these)
961
962
Use DBLP XML page:
963							e.g. http://www.informatik.uni-trier.de/~ley/pers/xx/l/Leith:Douglas_J=
964
965
966
Use PubMed query: e.g. mills kh[author]
967
968
969
BibTeX file:
970
971
972
Use Google Scholar personal page:
973							e.g. http://scholar.google.com/citations?user=n8dX1fUAAAAJ
974
975
976
Use Google Scholar search page:
977							e.g. http://scholar.google.com/scholar?q=andr%C3%A9s+garcia+saavedra
978
979
980

981

Enter references, one per line (free form text):

982

983

(can be slow, be patient)

984

985

Source: Bib::Tools

986

987

988

=head2 handle_query.pl

989

990

#!/usr/bin/perl

991

use Bib::CrossRef;

992

use Bib::Tools;

993

use CGI;

994

995

# send html header

996

print "Content-Type: text/html;\n\n";

997

998

my $q = CGI->new;

999

my $refs = Bib::Tools->new;

1000

my $orcid = scalar $q->param('orcid');

1001

$orcid =~ /([0-9\-]+)$/; # extract id out of url

1002

$orcid = $1;

1003

if (length($orcid) > 5) {

1004

$refs->add_orcid($1);

1005

}

1006

1007

my $google = scalar $q->param('google'); #NB: CGI has already carried out URL decoding

1008

if (length($google) > 5) {

1009

if (!($google =~ m/^http/)) { $google = "http://".$google;}

1010

$refs->add_google($google);

1011

}

1012

1013

my $google2 = scalar $q->param('google2'); #NB: CGI has already carried out URL decoding

1014

if (length($google2) > 5) {

1015

if (!($google2 =~ m/^http/)) { $google2 = "http://".$google2;}

1016

$refs->add_google_search($google2);

1017

}

1018

1019

my $dblp = scalar $q->param('dblp');

1020

if (length($dblp) > 5) {

1021

if (!($dblp =~ m/^http/)) { $dblp = "http://".$dblp;}

1022

if ($dblp =~ m/http:\/\/dblp.uni-trier.de\/pers\/xx\/l\/.+/) {

1023

# looks like a valid dblp url

1024

$refs->add_dblp($dblp);

1025

} else {

1026

print "

DBLP url looks invalid: ", $dblp,"

1027

}

1028

}

1029

1030

my $pubmed = scalar $q->param('pubmed');

1031

if (length($pubmed) > 5) {

1032

$refs->add_pubmed($pubmed);

1033

}

1034

1035

$filename = scalar $q->param('bibtex');

1036

$tmpfilename = $q->tmpFileName($filename);

1037

open my $fh, "<", $tmpfilename;

1038

$refs->add_bibtex($fh);

1039

1040

my @values = $q->multi_param('refs');

1041

foreach my $value (@values) {

1042

open my $fh, "<", \$value; #NB: CGI has already carried out URL decoding

1043

$refs->add_fromfile($fh);

1044

}

1045

1046

$refs->sethtml;

1047

print $refs->send_resp;

1048

1049

=head2 post.js

1050

1051

function GetCellValues(dataTable) {

1052

var table = document.getElementById(dataTable);

1053

if (table == null) return;

1054

var i = 0; var Obj = [];

1055

var names = table.rows[0];

1056

for (var r = 1; r < table.rows.length; r++) {

1057

if (table.rows[r].id == 'cite') {

1058

var row = table.rows[r].cells;

1059

var check = table.rows[r].getElementsByTagName('Input');

1060

if (check.length>0){

1061

Obj[i] = {};

1062

for (var c = 3; c < row.length; c++){

1063

var tag = names.cells[c].textContent;

1064

Obj[i][tag] =row[c].textContent;

1065

}

1066

i = i+1;

1067

}

1068

}

1069

}

1070

var jsonString = JSON.stringify(Obj);

1071

document.getElementById('out').innerHTML = document.getElementById('out').innerHTML+jsonString;

1072

// or POST using ajax

1073

}

1074

1075

=head1 VERSION

1076

1077

Ver 0.15

1078

1079

=head1 AUTHOR

1080

1081

Doug Leith

1082

1083

=head1 BUGS

1084

1085

Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.

1086

1087

=head1 COPYRIGHT

1088

1089

1090

1091

This program is free software; you can redistribute it and/or modify it under the terms of either: the GNU General Public License as published by the Free Software Foundation; or the Artistic License.

1092

1093

See http://dev.perl.org/licenses/ for more information.

1094

1095

=cut

1096

1097

1098

__END__