File Coverage

blib/lib/Pod/2/DocBook.pm

Criterion	Covered	Total	%
statement	392	462	84.8
branch	193	246	78.4
condition	51	82	62.2
subroutine	34	35	97.1
pod	11	11	100.0
total	681	836	81.4

line

stmt

bran

cond

sub

pod

time

code

package Pod::2::DocBook;

=head1 NAME

Pod::2::DocBook - Convert Pod data to DocBook SGML

=head1 SYNOPSIS

use Pod::2::DocBook;

my $parser = Pod::2::DocBook->new(

title => 'My Article',

doctype => 'article',

base_id => 'article42'

fix_double_quotes => 1,

spaces => 3,

id_version => 2,

);

$parser->parse_from_file ('my_article.pod', 'my_article.sgml');

=head1 DESCRIPTION

Pod::2::DocBook is a module for translating Pod-formatted documents to

DocBook 4.2 SGML (see L). It is primarily a

back end for B, but, as a Pod::Parser subclass, it can be

used on its own. The only public extensions to the Pod::Parser

interface are options available to C:

=over

=item doctype

This option sets the output document's doctype. The currently

supported types are B

, B, B and

. Special processing is performed when the doctype is set

to B (see L). You I set this option

in order to get valid DocBook output.

=item fix_double_quotes

If this option is set to a true value, pairs of double quote

characters ('"') in ordinary paragraphs will be replaced with

BquoteE> and B/quoteE>. See L

Paragraphs> for details.

=item header

If this option is set to a true value, Pod::2::DocBook will emit a

DOCTYPE as the first line of output.

=item spaces

Pod::2::DocBook produces pretty-printed output. This option sets the

number of spaces per level of indentation in the output.

=item title

This option sets the output document's title.

=back

The rest of this document only describes issues specific to

Pod::2::DocBook; for details on invoking the parser, specifically the

C, C and C methods,

see L.

=cut

107565

use 5.006001;

168

use strict;

141

use warnings;

150

use Digest::MD5 'md5_hex';

583

use Pod::Parser;

185

5476

use Pod::ParseLink;

4670

243

5605

use Text::ParseWords;

7694

341

5108

use Text::Wrap;

22884

320

4737

use List::MoreUtils 'any';

8973

388

=head1 METHODS

use base 'Pod::Parser';

=cut

use base 'Pod::Parser';

41730

our $VERSION = '0.03';

my $SPACE = q{ };

my $DOUBLE_QUOTE = q{"};

#----------------------------------------------------------------------

# overridden Pod::Parser methods

#----------------------------------------------------------------------

=head2 initialize()

Initialize parser.

100

=cut

101

102

sub initialize {

103

16297

my $parser = shift;

104

105

161

$parser->errorsub('error_msg');

106

$parser->{'Pod::2::DocBook::errors'} = [];

107

108

100

$parser->{title} ||= q{};

109

100

$parser->{spaces} ||= 0;

110

109

$parser->{id_version} ||= 1;

111

my $skip = $parser->{skip};

112

$parser->{skip} = [];

113

100

push @{$parser->{skip}}, split(/\s*,\s*/, $skip || '')

169

114

if $parser->{skip};

115

116

# if base_id not set, put title as base_id or a random number in worst case

117

123

$parser->{base_id} ||= $parser->{title} || q{:}._big_random_number();

118

$parser->{base_id} = $parser->cleanup_id($parser->{base_id});

119

120

286

return;

121

}

122

123

=head2 begin_pod()

124

125

Output docbook header stuff.

126

127

=cut

128

129

sub begin_pod {

130

3229

my ($parser) = @_;

131

116

my $out_fh = $parser->output_handle();

132

133

100

171

print $out_fh <<"END_HEADER" if $parser->{header};

134

{doctype}

135

PUBLIC "-//OASIS//DTD DocBook V4.2//EN"

136

"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" >

137

END_HEADER

138

139

256

print $out_fh join("\n",

140

'"),

148

"\n";

149

150

$parser->{indentlevel} = 1;

151

152

if ($parser->{doctype} eq 'refentry') {

153

print $out_fh join(q{},

154

"\n", $parser->_indent(),

155

"\n", $parser->_current_indent(),

156

"$parser->{title}", "\n",

157

$parser->_outdent(), "\n");

158

}

159

else {

160

print $out_fh '<',

161

$parser->{doctype},

162

($parser->{base_id} ? ' id="' . $parser->{base_id} . $DOUBLE_QUOTE : ()),

163

'>', </td> </tr> <tr> <td class="h" > <a name="164">164</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> $parser->{title}, </td> </tr> <tr> <td class="h" > <a name="165">165</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> '',

166

"\n";

167

}

168

169

1281

return;

170

}

171

172

=head2 end_pod()

173

174

Output docbook footer. Will print also errors if any in a comment block.

175

176

=cut

177

178

sub end_pod {

179

my ($parser) = @_;

180

my $out_fh = $parser->output_handle();

181

182

$parser->_transition('THE END');

183

184

# end document

185

print $out_fh "{doctype}>\n";

186

100

if (@{ $parser->{'Pod::2::DocBook::errors'} }) {

187

print $out_fh "\n\n";

195

}

196

197

1520

return;

198

}

199

200

=head2 commans($command, $paragraph, $line_num)

201

202

Process POD commands.

203

204

=cut

205

206

sub command {

207

103

178

my ($parser, $command, $paragraph, $line_num) = @_;

208

103

322

my $out_fh = $parser->output_handle();

209

210

103

100

699

return if $command eq 'pod';

211

212

# check if we need to skip this heading

213

100

189

if ($command =~ /^head[1-4]/xms) {

214

$parser->{'skip_current'} = (

215

100

(any { $paragraph =~ m/^$_/ } @{$parser->{'skip'}})

211

216

? 1

217

: 0

218

);

219

}

220

221

398

$paragraph =~ s/\s+$//sx;

222

4081

$paragraph = $parser->interpolate($paragraph, $line_num);

223

224

# For blocks must be considered before we escape entries, otherwise

225

# docbook markup will get mangled.

226

227

100

225

if ($command eq 'for') {

228

$parser->_transition('for');

229

100

if ($paragraph =~ /^(:\S+|docbook)/xms) {

230

$paragraph =~ s/$1\s+//xms;

231

print $out_fh $paragraph, "\n";

232

}

233

234

# If we've processed a docbook 'for', then we're done.

235

# If we've process any other 'for', then it wasn't

236

# intended for us, and we're also done.

237

186

return;

238

}

239

240

# Now escape SGML-escape our text, and figure out what to do

241

# with it.

242

243

145

$paragraph = _fix_chars($paragraph);

244

245

100

417

if ($command =~ /^head[1-4]/xms) {

100

246

$parser->_transition($command);

247

248

100

143

return if $parser->{'skip_current'};

249

$parser->_handle_head($command, $paragraph, $line_num);

250

}

251

elsif ($command eq 'begin') {

252

$parser->_transition("begin $paragraph");

253

push(@{ $parser->{'Pod::2::DocBook::state'} }, "begin $paragraph");

254

}

255

elsif ($command eq 'end') {

256

$parser->_transition("end $paragraph");

257

}

258

elsif ($command eq 'over') {

259

$parser->_transition('over');

260

push @{ $parser->{'Pod::2::DocBook::state'} }, 'over';

261

}

262

elsif ($command eq 'item') {

263

$parser->_transition('item');

264

$parser->_handle_item($paragraph, $line_num);

265

}

266

elsif ($command =~ /^back/xms) {

267

$parser->_transition('back');

268

}

269

else {

270

my $file = $parser->input_file();

271

$parser->error_msg("unknown command `$command' at",

272

"line $line_num in file $file");

273

}

274

275

3709

return;

276

}

277

278

=head2 textblock ($paragraph, $line_num)

279

280

Process text block.

281

282

=cut

283

284

sub textblock {

285

127

my ($parser, $paragraph, $line_num) = @_;

286

223

my $out_fh = $parser->output_handle();

287

my $state = pop @{ $parser->{'Pod::2::DocBook::state'} };

150

288

my $para_out = q{};

289

290

100

176

$state = q{} unless defined $state;

291

100

369

$paragraph =~ s/\s+$//xms unless $state eq 'begin docbook';

292

293

100

182

$paragraph =~ s/&/&/xmsg unless $state eq 'begin docbook';

294

295

100

316

unless ($state eq 'begin docbook' || $state eq 'begin table') {

296

4462

$paragraph = $parser->interpolate($paragraph, $line_num);

297

165

$paragraph = _fix_chars($paragraph);

298

}

299

300

421

if ($state eq 'name') {

100

301

my ($name, $purpose) = split(/\s*-\s*/xms, $paragraph, 2);

302

303

$para_out = join(q{},

304

$parser->_indent(),

305

"\n",

306

$parser->_current_indent(),

307

"$name\n",

308

"$purpose\n",

309

$parser->_outdent(),

310

"\n");

311

}

312

elsif ($state eq 'synopsis+') {

313

$para_out = join(q{},

314

$parser->_indent(), "\n",

315

"$paragraph\n");

316

317

push @{ $parser->{'Pod::2::DocBook::state'} }, 'synopsis';

318

}

319

elsif ($state eq 'synopsis') {

320

$para_out = "$paragraph\n";

321

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

322

}

323

elsif ($state eq 'begin docbook') {

324

push @{ $parser->{'Pod::2::DocBook::dbpara'} }, $paragraph;

325

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

326

}

327

elsif ($state eq 'begin table') {

328

$parser->_handle_table($paragraph, $line_num);

329

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

330

}

331

elsif ($state =~ /^begin\s[^:]/xms) {

332

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

333

}

334

elsif ($state eq 'over') {

335

## no critic (Variables::ProhibitPackageVars )

336

local $Text::Wrap::huge = 'overflow'; # don't break tags

337

## use critic

338

339

$paragraph =~ s/\s*\n\s*/ /gx; # don't just wrap, fill

340

341

$para_out = join(

342

q{},

343

$parser->_indent(),

344

\n",

345

$parser->_indent(),

346

"\n",

347

wrap(

348

' ' x ($parser->{spaces} * $parser->{indentlevel}),

349

' ' x ($parser->{spaces} * $parser->{indentlevel}),

350

$paragraph

351

352

"\n",

353

$parser->_outdent(),

354

"\n"

355

);

356

357

push @{ $parser->{'Pod::2::DocBook::state'} }, 'indent';

358

}

359

else {

360

## no critic (Variables::ProhibitPackageVars )

361

local $Text::Wrap::huge = 'overflow'; # don't break tags

362

## use critic

363

364

100

131

print $out_fh "]]>\n" if $state eq 'verbatim';

365

366

120

$paragraph =~ s/\s*\n\s*/ /gx; # don't just wrap, fill

367

368

{

369

$para_out = $parser->_indent;

151

370

371

140

my $padding = ' ' x ($parser->{spaces} * $parser->{indentlevel});

372

373

187

$para_out .= join q{},

374

"\n",

375

wrap($padding, $padding, $paragraph),

376

"\n",

377

$parser->_outdent,

378

"\n";

379

}

380

185

$state =~ s/\+$//xms;

381

100

339

push @{ $parser->{'Pod::2::DocBook::state'} }, $state

102

382

unless ($state eq 'verbatim' || $state eq q{});

383

}

384

385

# fix double quotes in ordinary paragraphs if asked to

386

100

564

if ($state !~ /^begin/xms && $parser->{fix_double_quotes} && $para_out =~ /"/xms)

100

387

{

388

my @protected;

389

while ($para_out =~ m#(<[^>"]*".+?>)#sx) {

390

391

# don't modify things that look like tags with quotes inside

392

my $protect = $1 || $2;

393

my $replace = quotemeta($protect);

394

395

$para_out =~ s/$replace/\376/xms;

396

push @protected, $protect;

397

}

398

399

$para_out =~ s!"(.+?)"!$1!sgx;

400

foreach my $protect (@protected) {

401

$para_out =~ s/\376/$protect/xms;

402

}

403

}

404

405

100

214

print $out_fh $para_out

406

if not $parser->{'skip_current'};

407

408

4464

return;

409

}

410

411

=head2 verbatim($paragraph, $line_num)

412

413

Process verbatim text block.

414

415

=cut

416

417

sub verbatim {

418

my ($parser, $paragraph, $line_num) = @_;

419

my $out_fh = $parser->output_handle();

420

100

my $state = pop @{ $parser->{'Pod::2::DocBook::state'} } || q{};

421

my @lines;

422

my $min_leader;

423

424

100

return if $parser->{'skip_current'};

425

426

$paragraph =~ s/\s+$//sx unless $state eq 'begin docbook';

427

428

@lines = split(/\n/xms, $paragraph);

429

foreach my $line (@lines) {

430

431

# expand tabs (see perldoc -q 'expand tabs')

432

1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/ex;

433

434

# find the minimum-length whitespace leader for this paragraph

435

my ($leader) = ($line =~ /^(\s+)/xms);

436

$leader ||= q{};

437

438

$min_leader = length($leader)

439

if ((not defined $min_leader) or (length($leader) < $min_leader));

440

}

441

442

$paragraph = join("\n", @lines);

443

444

# strip the minimum-length whitespace leader from every line

445

$paragraph =~ s/^\s{$min_leader}//gxms

446

if $min_leader;

447

448

if (!defined $state) {

100

449

print $out_fh $parser->_current_indent(), "

450

push @{ $parser->{'Pod::2::DocBook::state'} }, 'verbatim';

451

}

452

elsif ($state eq 'name') {

453

my ($name, $purpose) = split(/\s*-\s*/xms, $paragraph, 2);

454

455

$purpose ||= q{}; # $purpose can be empty

456

457

print $out_fh $parser->_indent,

458

"\n",

459

$parser->_current_indent,

460

"$name\n",

461

$parser->_current_indent,

462

"$purpose\n",

463

$parser->_outdent,

464

"\n";

465

}

466

elsif ($state eq 'synopsis+') {

467

print $out_fh join(q{},

468

$parser->_indent(), "\n",

469

"$paragraph\n");

470

471

push @{ $parser->{'Pod::2::DocBook::state'} }, 'synopsis';

472

}

473

elsif ($state eq 'synopsis') {

474

print $out_fh "$paragraph\n";

475

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

476

}

477

elsif ($state eq 'begin docbook') {

478

push @{ $parser->{'Pod::2::DocBook::dbpara'} }, $paragraph;

479

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

480

}

481

elsif ($state =~ /^begin\s[^:]/xms) {

482

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

483

}

484

elsif ($state eq 'over') {

485

print $out_fh join(q{},

486

$parser->_indent(), "

\n", $parser->_current_indent(),

487

488

489

push @{ $parser->{'Pod::2::DocBook::state'} }, 'indent';

490

push @{ $parser->{'Pod::2::DocBook::state'} }, 'verbatim';

491

}

492

elsif ($state eq 'verbatim') {

493

print $out_fh "\n\n$paragraph";

494

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

495

}

496

else {

497

print $out_fh $parser->_current_indent(), "

498

$state =~ s/\+$//xms;

499

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

500

push @{ $parser->{'Pod::2::DocBook::state'} }, 'verbatim';

501

}

502

503

259

return;

504

}

505

506

=head2 interior_sequence($command, $argument, $seq)

507

508

Process formatting commands.

509

510

=cut

511

512

sub interior_sequence {

513

my ($parser, $command, $argument, $seq) = @_;

514

my $out_fh = $parser->output_handle();

515

my $string;

516

517

# nothing is ever allowed to be nested inside of E<>, or Z<>

518

100

105

if (my $parent = $seq->nested()) {

519

if ($parent->cmd_name() eq 'E' || $parent->cmd_name() eq 'Z') {

520

my ($file, $line) = $seq->file_line();

521

$parser->error_msg(

522

"formatting code `$command' nested within",

523

"`" . $parent->cmd_name() . "'",

524

"at line $line in file $file"

525

);

526

116

return $seq->raw_text();

527

}

528

}

529

530

$argument = q{} unless defined $argument;

531

532

# the substring "\37632\377" is a space character protected

533

# against translation in S<>; other characters are protected at

534

# the end of this function, and all protected characters are

535

# de-protected in _fix_chars ()

536

537

100

108

if ($command eq 'I') {

100

538

$string = qq!$argument!;

539

}

540

elsif ($command eq 'B') {

541

$string = qq!$argument!;

542

}

543

elsif ($command eq 'C') {

544

$string = qq!!

545

. "";

546

}

547

elsif ($command eq 'L') {

548

$string = $parser->_handle_L($argument, $seq);

549

}

550

elsif ($command eq 'E') {

551

$string = $parser->_handle_E($argument, $seq);

552

}

553

elsif ($command eq 'F') {

554

$string = "$argument";

555

}

556

elsif ($command eq 'S') {

557

$argument =~ s/\s(?![^<]*>)/ /gx;

558

$string = $argument;

559

}

560

elsif ($command eq 'X') {

561

$string = "$argument";

562

}

563

elsif ($command eq 'Z') {

564

$string = q{};

565

}

566

else {

567

my ($file, $line) = $seq->file_line();

568

$parser->error_msg("unknown formatting code `$command' at line",

569

"in file $file");

570

$string = $seq->raw_text();

571

}

572

573

# protect &, <, and > characters from later processing

574

# I got this from the first edition Camel Book

575

unless ($seq->nested()) {

576

577

# just do this once, at the top of a subtree so we can

578

# report more meaningful errors along the way

579

foreach my $char ('&', '<', '>') {

580

602

$string =~ s/$char/"\376" . ord ($char) . "\377"/egx;

110

275

581

}

582

}

583

584

845

return $string;

585

}

586

587

#----------------------------------------------------------------------

588

# other public methods

589

#----------------------------------------------------------------------

590

591

=head2 error_msg

592

593

Returns parser error message(s) if any occured.

594

595

=cut

596

597

sub error_msg {

598

my ($parser, @parts) = @_;

599

600

push(@{ $parser->{'Pod::2::DocBook::errors'} }, join(' ', @parts));

601

602

return;

603

}

604

605

#----------------------------------------------------------------------

606

# private methods and helper functions

607

#----------------------------------------------------------------------

608

609

sub _indent {

610

174

207

my ($parser) = @_;

611

174

585

return (' ' x ($parser->{spaces} * $parser->{indentlevel}++));

612

}

613

614

sub _outdent {

615

174

12122

my ($parser) = @_;

616

174

615

return (' ' x (--$parser->{indentlevel} * $parser->{spaces}));

617

}

618

619

sub _current_indent {

620

my $parser = shift;

621

178

return ' ' x ($parser->{spaces} * $parser->{indentlevel});

622

}

623

624

=head2 make_id($text)

625

626

default id format -

627

628

Function will construct an element id string. Id string is composed of

629

C<< join (':', $parser->{base_id}, $text) >>, where C<$text> in most cases

630

is the pod heading text.

631

632

version 2 id format -

633

634

having ':' in id was not a best choice. (Xerces complains - Attribute value

635

"lib.Moose.Manual.pod:NAME" of type ID must be an NCName when namespaces are

636

enabled.) To not break backwards compatibity switch with F< 2>> in

637

constructor for using '-' instead.

638

639

The xml id string has strict format. Checkout L function for

640

specification.

641

642

=cut

643

644

sub make_id {

645

1150

my $parser = shift;

646

my $text = shift;

647

my $base_id = $parser->{base_id};

648

649

# trim text spaces

650

$text =~ s/^\s*//xms;$text =~ s/\s*$//xms;

1385

651

$base_id =~ s/^\s*//xms;$base_id =~ s/\s*$//xms;

153

652

653

return $parser->cleanup_id(join ('-', $base_id, $text))

654

if $parser->{'id_version'} == 2;

655

656

117

return $parser->cleanup_id(join (':', $base_id, $text));

657

}

658

659

660

=head2 make_uniq_id($text)

661

662

Calls C<< $parser->make_id($text) >> and checks if such id was already

663

generated. If so, generates new one by adding _i1 (or _i2, i3, ...) to the id

664

string. Return value is new uniq id string.

665

666

=cut

667

668

sub make_uniq_id {

669

my $parser = shift;

670

my $text = shift;

671

672

my $id_string = $parser->make_id($text);

673

674

# prevent duplicate ids

675

100

my $ids_used = $parser->{'ids_used'} || {};

676

while (exists $ids_used->{$id_string}) {

677

100

293

if ($id_string =~ m/_i(\d+)$/xms) {

678

116

my $last_used_id_index = $1;

679

398

substr($id_string, 0-length($last_used_id_index), length($id_string), $last_used_id_index + 1);

680

}

681

else {

682

$id_string .= '_i1';

683

}

684

}

685

$ids_used->{$id_string} = 1;

686

$parser->{'ids_used'} = $ids_used;

687

688

return $id_string;

689

}

690

691

sub _handle_L {

692

my ($parser, $argument, $seq) = @_;

693

my $node = $seq;

694

695

# look all the way up the subtree to see if any ancestor is an 'L'

696

while ($node = $node->nested()) {

697

if ($node->cmd_name() eq 'L') {

698

my ($file, $line) = $seq->file_line();

699

$parser->error_msg("formatting code `L' nested within `L' at",

700

"line $line in file $file");

701

return $seq->raw_text();

702

}

703

}

704

705

# the substring "\37632\377" is a space character protected

706

# against translation in S<>; other characters are protected at

707

# the end of interior_sequence (), and all protected characters

708

# are de-protected in _fix_chars ()

709

710

my ($text, $inferred, $name, $section, $type) = parselink($argument);

711

100

296

$inferred =~ s/&/&/xmsg

712

if $inferred;

713

100

$name =~ s/&/&/xmsg

714

if $name;

715

716

100

return qq!$inferred!

717

if $type eq 'url';

718

719

# types 'man' and 'pod' are handled the same way

720

100

if (defined $section && !defined $name) {

721

my $id = $parser->make_id($section);

722

723

100

$section = $text if defined $text;

724

return (qq!$section!

725

. "");

726

}

727

728

100

return $text if defined $text;

729

730

100

if (defined $name) {

731

100

my $string =

732

$name =~ /(.+?)$(.+)$/xms

733

? $parser->_manpage($1, $2)

734

: $parser->_manpage($name);

735

736

100

return defined $section

737

? "$section in $string"

738

: $string;

739

}

740

741

my ($file, $line) = $seq->file_line();

742

$parser->error_msg("empty L<> at line", "$line in file $file\n");

743

return $seq->raw_text();

744

}

745

746

sub _handle_E {

747

my ($parser, $argument, $seq) = @_;

748

749

100

if ($argument !~ /\A\w+\z/xms) {

750

my ($file, $line) = $seq->file_line();

751

$parser->error_msg("invalid escape `$argument'",

752

"at line $line in file $file\n");

753

return $seq->raw_text();

754

}

755

756

# careful! the order is important

757

return

758

100

$argument eq 'verbar' ? '|'

100

759

: $argument eq 'sol' ? '/'

760

: ( $argument eq 'lchevron'

761

or $argument eq 'laquo') ? '«'

762

: ( $argument eq 'rchevron'

763

or $argument eq 'raquo') ? '»'

764

: $argument =~ /^0x/xms ? '&#' . hex($argument) . ';'

765

: $argument =~ /^0/xms ? '&#' . oct($argument) . ';'

766

: $argument =~ /^\d+$/xms ? "&#$argument;"

767

: "&$argument;";

768

}

769

770

sub _handle_head {

771

my ($parser, $command, $paragraph, $line_num) = @_;

772

561

my $out_fh = $parser->output_handle();

773

774

if ( $parser->{doctype} eq 'refentry'

775

&& $command eq 'head1'

776

&& $paragraph eq 'NAME')

777

{

778

push @{ $parser->{'Pod::2::DocBook::state'} }, 'name';

779

}

780

elsif ($parser->{doctype} eq 'refentry'

781

&& $command eq 'head1'

782

&& $paragraph eq 'SYNOPSIS')

783

{

784

push @{ $parser->{'Pod::2::DocBook::state'} }, 'synopsis+';

785

}

786

else {

787

push @{ $parser->{'Pod::2::DocBook::state'} }, "$command+";

788

my $id = $parser->make_uniq_id($paragraph);

789

790

if ($parser->{doctype} eq 'refentry') {

791

print $out_fh $parser->_indent(),

792

qq!$paragraph\n!;

793

}

794

else {

795

print $out_fh $parser->_indent(),

796

qq!

$paragraph\n!;

797

}

798

}

799

800

return;

801

}

802

803

sub _handle_item {

804

my ($parser, $paragraph, $line_num) = @_;

805

my $out_fh = $parser->output_handle();

806

my $state = pop @{ $parser->{'Pod::2::DocBook::state'} };

807

808

100

$state = q{} unless defined $state;

809

810

if ($state eq 'verbatim') {

811

print $out_fh "]]>\n";

812

$state = pop @{ $parser->{'Pod::2::DocBook::state'} };

813

$state = q{} unless defined $state;

814

}

815

816

if ($state =~ /list\+$/xms) {

817

print $out_fh $parser->_current_indent(), "\n";

818

}

819

820

100

123

if ($state eq 'over') {

100

821

822

# first item

823

100

124

if ( !defined($paragraph)

100

824

|| $paragraph =~ /^\s*$/xms

825

|| $paragraph eq q{*})

826

{

827

print $out_fh join(q{},

828

$parser->_indent(), "\n", $parser->_indent(),

829

"\n", $parser->_indent(), "\n");

830

$state = 'list+';

831

}

832

elsif ($paragraph =~ /^([1aAiI])\.?$/xms) {

833

my $numeration = {

834

1 => 'arabic',

835

a => 'loweralpha',

836

A => 'upperalpha',

837

i => 'lowerroman',

838

I => 'upperroman'

839

}->{$1};

840

841

print $out_fh join(q{},

842

$parser->_indent(),

843

"\n",

844

$parser->_indent(),

845

qq!\n!,

846

$parser->_indent(),

847

"\n");

848

$state = 'olist+';

849

}

850

else {

851

my $id = $parser->make_uniq_id($paragraph);

852

print $out_fh join(q{},

853

$parser->_indent(),

854

"\n",

855

$parser->_indent(),

856

"\n",

857

$parser->_indent(),

858

"\n",

859

$parser->_current_indent(),

860

qq!$paragraph\n!,

861

$parser->_indent(),

862

qq!\n!);

863

$state = 'vlist+';

864

}

865

}

866

elsif ($state =~ /^o?list/xms) {

867

print $out_fh join(q{},

868

$parser->_outdent(), "\n",

869

$parser->_indent(), "\n");

870

$state = "$state+" unless $state =~ /\+$/xms;

871

}

872

elsif ($state =~ /^vlist/xms) {

873

my $id = $parser->make_uniq_id($paragraph);

874

print $out_fh join(q{},

875

$parser->_outdent(),

876

"\n",

877

$parser->_outdent(),

878

"\n",

879

$parser->_indent(),

880

"\n",

881

$parser->_current_indent(),

882

qq!$paragraph\n!,

883

$parser->_indent(),

884

"\n");

885

$state = 'vlist+';

886

}

887

else {

888

$parser->error_msg(

889

'=item must be inside an',

890

'=over ... =back region',

891

"at line $line_num in file",

892

$parser->input_file()

893

);

894

}

895

896

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

897

898

return;

899

}

900

901

sub _transition {

902

109

135

my ($parser, $what) = @_;

903

109

371

my $out_fh = $parser->output_handle();

904

109

116

my ($level);

905

906

# $level helps us determine what to do when we see =head

907

# 1-4 are the valid numbers after '=head', so 0 and 5

908

# are safe to use to mark out-of-bounds on either side

909

109

100

230

if ($what eq 'THE END') {

100

910

$level = 0;

911

}

912

elsif ($what =~ /^head(\d)/xms) {

913

$level = $1;

914

}

915

else {

916

$level = 5;

917

}

918

919

109

while (my $state = pop @{ $parser->{'Pod::2::DocBook::state'} }) {

122

418

920

100

489

if ( ($what eq 'item' || $what eq 'over')

921

&& ($state eq 'over' || $state =~ /^(o|v)?list/xms))

922

{

923

924

# these are treated specially in _handle_item ()

925

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

926

last;

927

}

928

929

if ($state =~ /list\+$/xms) {

930

print $out_fh $parser->_current_indent(), "\n";

931

$state =~ s/\+$//xms;

932

}

933

934

100

189

if ($state =~ /^head(\d)/xms) {

100

935

my $prev_level = $1;

936

937

100

if ($level > $prev_level) {

938

939

# embed in a previously opened section (i.e. restore

940

# state and continue processing the document)

941

942

# the enclosing section is no longer empty

943

$state =~ s/\+$//xms;

944

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

945

last;

946

}

947

else {

948

100

if ($state =~ /\+$/xms) {

949

950

# prevent empty sections

951

print $out_fh $parser->_current_indent(), "\n";

952

}

953

954

# close the previous section and continue with the stack

955

if ($parser->{doctype} eq 'refentry') {

956

print $out_fh $parser->_outdent(), "\n";

957

}

958

else {

959

print $out_fh $parser->_outdent(), "\n";

960

}

961

}

962

}

963

elsif ($state eq 'indent') {

964

print $out_fh $parser->_outdent(), "\n";

965

966

push @{ $parser->{'Pod::2::DocBook::state'} }, 'over'

967

if ($what eq 'item');

968

969

last if $what eq 'back';

970

}

971

elsif ($state eq 'list') {

972

print $out_fh join(q{},

973

$parser->_outdent(), "\n", $parser->_outdent(),

974

"\n", $parser->_outdent(), "\n");

975

976

last if $what eq 'back';

977

}

978

elsif ($state eq 'olist') {

979

print $out_fh join(q{},

980

$parser->_outdent(), "\n", $parser->_outdent(),

981

"\n", $parser->_outdent(), "\n");

982

983

last if $what eq 'back';

984

}

985

elsif ($state eq 'vlist') {

986

print $out_fh join(q{},

987

$parser->_outdent(), "\n", $parser->_outdent(),

988

"\n", $parser->_outdent(), "\n",

989

$parser->_outdent(), "\n");

990

991

last if $what eq 'back';

992

}

993

elsif ($state =~ /^synopsis/xms) {

994

print $out_fh join(q{},

995

$parser->_indent(), "\n",

996

$parser->_current_indent(), "\n")

997

if $state eq 'synopsis+';

998

999

print $out_fh $parser->_outdent(), "\n";

1000

}

1001

elsif ($state eq 'name') {

1002

print $out_fh join(q{},

1003

$parser->_indent(), "\n",

1004

$parser->_indent(), "\n",

1005

$parser->_current_indent(), "\n",

1006

$parser->_outdent(), "\n");

1007

}

1008

elsif ($state eq 'verbatim') {

1009

print $out_fh "]]>\n";

1010

}

1011

elsif ($state =~ /^begin\s(.+)/xms) {

1012

my $begin_format = $1;

1013

100

if ($what =~ /^end\s(.+)/xms) {

1014

my $end_format = $1;

1015

1016

100

if ($end_format eq $begin_format) {

1017

100

if ($end_format eq 'docbook') {

1018

my $paragraph =

1019

join(q{}, @{ $parser->{'Pod::2::DocBook::dbpara'} });

1020

$paragraph =~ s/\s+$//xms;

1021

print $out_fh $paragraph, "\n";

1022

$parser->{'Pod::2::DocBook::dbpara'} = [];

1023

}

1024

1025

last;

1026

}

1027

else {

1028

1029

# this is bad POD, but we do what we can

1030

# (maybe we'll find the begin we're looking for

1031

# deeper in the stack)

1032

$parser->error_msg(

1033

"`=end $end_format' found",

1034

'but current region opened with',

1035

"`=begin $begin_format'"

1036

);

1037

}

1038

}

1039

elsif ($what eq 'THE END') {

1040

1041

# this is bad POD, but we do what we can

1042

$parser->error_msg("no matching `=end' for",

1043

"`=begin $begin_format'");

1044

1045

# we've got the data stored; might as well use it

1046

if ($begin_format eq 'docbook') {

1047

my $paragraph =

1048

join(q{}, @{ $parser->{'Pod::2::DocBook::dbpara'} });

1049

$paragraph =~ s/\s+$//xms;

1050

print $out_fh $paragraph, "\n";

1051

$parser->{'Pod::2::DocBook::dbpara'} = [];

1052

}

1053

}

1054

else {

1055

push @{ $parser->{'Pod::2::DocBook::state'} }, $state;

1056

last;

1057

}

1058

}

1059

elsif ($state eq 'over') {

1060

next;

1061

}

1062

else {

1063

$parser->error_msg("encountered unknown state `$state'",

1064

'(this should never happen)');

1065

}

1066

}

1067

1068

109

178

return;

1069

}

1070

1071

sub _handle_table {

1072

my ($parser, $paragraph, $line_num) = @_;

1073

my $out_fh = $parser->output_handle();

1074

my (@rows, $columns, $title);

1075

1076

my $TABLE_ROW_TITLE = 0;

1077

my $TABLE_ROW_ALIGNMENTS = 1;

1078

my $TABLE_ROW_HEADER = 2;

1079

my $TABLE_FIRST_DATA_ROW = 3;

1080

1081

foreach my $row (split(/\n/xms, $paragraph)) {

1082

my @fields = quotewords(',', 0, $row);

1083

1084

100

1237

$columns = @fields

1085

if (!defined $columns || @fields > $columns);

1086

push @rows, [@fields];

1087

}

1088

1089

# the first row specifies the title

1090

$title = $rows[$TABLE_ROW_TITLE]->[0];

1091

1092

print $out_fh join(q{},

1093

$parser->_indent(), "\n", $parser->_current_indent(), \n", $parser->_indent(), "\n"); \n"); \n"; \n", $parser->_outdent(),

1094					"$title\n", $parser->_indent(),
1095					qq!\n!);
1096
1097					# the second row specifies column alignments
1098	2			4	foreach my $spec (@{ $rows[$TABLE_ROW_ALIGNMENTS] }) {
	2			3
1099	6			13	print $out_fh $parser->_current_indent(), '
1100
1101	6	100	13	27	if (any { $_ eq $spec } qw(left right center justify)) {
	13			25
1102	5			16	print $out_fh qq!align="$spec">\n!;
1103					}
1104					else {
1105	1			2	print $out_fh qq!align="left">\n!;
1106	1			11	$parser->error_msg(
1107					"unknown colspec `$spec' in table",
1108					$title, "at line $line_num in file",
1109					$parser->input_file()
1110					);
1111					}
1112					}
1113
1114					# the third row (first row of data) is the table header
1115	2			7	print $out_fh
1116					join(q{}, $parser->_indent(), "
1117
1118	2			5	foreach my $field (@{ $rows[$TABLE_ROW_HEADER] }) {
	2			4
1119	6			10	print $out_fh $parser->_current_indent(), "$field\n";
1120					}
1121
1122	2			8	print $out_fh join(q{},
1123					$parser->_outdent(), "\n", $parser->_outdent(), "
1124
1125					# the remaining rows are the table body
1126	2			7	print $out_fh $parser->_indent(), "
1127
1128	2			10	foreach my $row (@rows[ $TABLE_FIRST_DATA_ROW .. $#rows ]) {
1129	10			17	print $out_fh $parser->_indent(), "\n";
1130
1131	10			12	foreach my $field (@{$row}) {
	10			16
1132	30			50	print $out_fh $parser->_current_indent(), "$field\n";
1133					}
1134
1135	10			22	print $out_fh $parser->_outdent(), "\n";
1136					}
1137
1138	2			6	print $out_fh join(q{},
1139					$parser->_outdent(), "
1140					"\n", $parser->_outdent(), "

\n");

1141

1142

return;

1143

}

1144

1145

sub _manpage {

1146

my ($parser, $title, $volnum) = @_;

1147

1148

# the substring "\37632\377" is a space character protected

1149

# against translation in S<>; other characters are protected at

1150

# the end of interior_sequence (), and all protected characters

1151

# are de-protected in _fix_chars ()

1152

1153

100

my $manvol =

1154

$volnum

1155

? "\37632\377" x $parser->{spaces} . "$volnum"

1156

: q{};

1157

1158

return join "\n" => '',

1159

"\37632\377" x $parser->{spaces}

1160

. "$title",

1161

$manvol,

1162

'';

1163

}

1164

1165

#----------------------------------------------------------------------

1166

# helper functions

1167

#----------------------------------------------------------------------

1168

1169

sub _fix_chars {

1170

168

204

my ($paragraph) = @_;

1171

1172

# fix characters that might annoy an SGML parser

1173

168

221

$paragraph =~ s/&/&/gxms;

1174

168

164

$paragraph =~ s/

1175

168

190

$paragraph =~ s/>/>/gxms;

1176

1177

# finally, de-protect any characters that were protected

1178

# from the previous step

1179

168

219

$paragraph =~ s!\376(\d+)\377!pack ('C', $1)!egxms;

128

382

1180

1181

168

284

return $paragraph;

1182

}

1183

1184

=head2 cleanup_id($id_string)

1185

1186

This function is used internally to remove/change any illegal characters

1187

from the elements id string. (see http://www.w3.org/TR/2000/REC-xml-20001006#NT-Name

1188

for the id string specification)

1189

1190

$id_string =~ s//$1/g; # keep just inside of CDATA

1191

$id_string =~ s/<.+?>//g; # remove tags

1192

$id_string =~ s/^\s*//; # ltrim spaces

1193

$id_string =~ s/\s*$//; # rtrim spaces

1194

$id_string =~ tr{/ }{._}; # replace / with . and spaces with _

1195

$id_string =~ s/[^\-_a-zA-Z0-9\.: ]//g; # closed set of characters allowed in id string

1196

1197

In the worst case when the C<$id_string> after clean up will not conform with

1198

the specification, warning will be printed out and random number with leading colon

1199

will be used.

1200

1201

=cut

1202

1203

sub cleanup_id {

1204

my $parser = shift;

1205

my $id_string = shift;

1206

1207

$id_string =~ s//$1/gxms;# keep just inside of CDATA

1208

$id_string =~ s/<.+?>//gxms; # remove tags

1209

162

$id_string =~ s/^\s*//xms; # ltrim spaces

1210

284

$id_string =~ s/\s*$//xms; # rtrim spaces

1211

$id_string =~ tr{/ }{._}; # replace / with . and spaces with _

1212

113

$id_string =~ s/[^\-_a-zA-Z0-9\.:]//gxms; # closed set of characters allowed in id string

1213

$id_string =~ s/^[^A-Za-z_:]+//xms; # remove invalid leading characters

1214

135

$id_string =~ s/:/_/xmsg # remove : in ids version 2

1215

if $parser->{'id_version'} == 2;

1216

1217

# check if the id string is valid (SEE http://www.w3.org/TR/2000/REC-xml-20001006#NT-Name)

1218

# TODO refactor to the function, we will need if also later and some tests will be handfull

1219

# we should also "die" if the base_id is set through the command line parameter

1220

157

if ($id_string !~ m/^[A-Za-z_:] [-A-Za-z0-9_.:]*/xms) {

1221

$id_string = q{:}._big_random_number();

1222

warn 'wrong xml id string "', $id_string, '", throwing away and using ', $id_string, ' instead!', "\n";

1223

}

1224

1225

138

return $id_string;

1226

}

1227

1228

sub _big_random_number {

1229

## no critic ValuesAndExpressions::ProhibitMagicNumbers

1230

return int(rand(9e10)+10e10);

1231

## use critic

1232

}

1233

1234

1235

1236

__END__