File Coverage

blib/lib/LaTeX/Parser.pm
Criterion Covered Total %
statement 64 67 95.5
branch 16 22 72.7
condition 1 3 33.3
subroutine 6 6 100.0
pod 2 4 50.0
total 89 102 87.2


line stmt bran cond sub pod time code
1             package LaTeX::Parser;
2              
3             =head1 NAME
4              
5             LaTeX::Parser - Perl extension to parse LaTeX files
6              
7             =head1 SYNOPSIS
8              
9             use LaTeX::Parser;
10             my $l = new LaTeX::Parser 'file' => 'file.tex';
11             my $p = $l->latex; # $p now hold a reference to an array of
12             # file.tex parsed
13              
14             Or use it to break up LaTeX in a variable:
15              
16             my $l = new LaTeX::Parser 'content' =>
17             '\textit{Three Lives} by Gertrude Stein.';
18              
19             Contents of nested braces are extracted as a single element. Another
20             C will have to be created to parse nested braces.
21              
22             This is a very early version of C, there are many bugs.
23             I think this will work fine with plain TeX files but I do not plan on
24             ever support that.
25              
26             =head1 DESCRIPTION
27              
28             For now, only simple descriptions of the modules functions.
29              
30             =cut
31              
32 1     1   623 use strict;
  1         2  
  1         34  
33 1     1   900 use integer;
  1         10  
  1         6  
34              
35              
36             =over 4
37              
38             =item LaTeX::Parser->new %hash
39              
40             Creates a LaTeX::Parser object. All values in C<%hash> are initialize
41             to the values in the object. The only two tested values to set are
42             `C' and `C'. `C' is the name of the file to load
43             the LaTeX file from, and it get copied into `C'. If content
44             is set by then C<%hash> then `C' will never be called.
45              
46             =cut
47              
48             sub new {
49 1     1 1 47 my $this = shift;
50 1   33     8 my $class = ref($this) || $this;
51 1         4 my %hash = @_;
52 1         2 my $self = \%hash;
53 1         3 bless $self, $class;
54 1         3 return $self;
55             }
56              
57             # Function to be considered private that loads the LaTeX file and
58             # throws out comments.
59             sub load {
60 1     1 0 1 my $self = shift;
61              
62 1         3 $self->{'content'} = '';
63 1 50       37 open(FILE, $self->{'file'}) || die "Can't load `$self->{file}', $!\n";
64            
65             LINE:
66 1         27 while () {
67 26 100       43 if (m/^%/) {
68 1         4 next LINE;
69             }
70 25         25 s/%.*$//;
71 25         57 $self->{'content'} .= $_;
72             }
73 1         34 close(FILE);
74 1         3 return $self;
75             }
76              
77             =item LaTeX::Parser->latex
78              
79             No arguments. Actualy does all the work. Loads the LaTeX file if not
80             content was specified, and returns a reference to all parsed
81             information.
82              
83             =cut
84             sub latex {
85 1     1 1 5 my $self = shift;
86              
87 1 50       8 if (!defined $self->{'content'}) {
88 1         3 $self->load;
89             }
90              
91 1         3 my $content = $self->{'content'};
92            
93 1         3 do {
94 25 100       73 if ($content =~ m/^(.*?)([\\\{])/s) {
95 24         37 my $prematch = $1;
96 24         28 my $match = $2;
97              
98 24 100       41 if ($prematch ne '') {
99 11         11 push @{$self->{'parsed'}}, $prematch;
  11         19  
100 11         17 $prematch = quotemeta($prematch);
101 11         68 $content =~ s/^$prematch//s;
102             }
103            
104 24 100       56 if ($match eq '{') {
    50          
105 13         18 $match = &matching('{', '}', $content);
106 13         13 push @{$self->{'parsed'}}, $match;
  13         26  
107 13         19 $match = quotemeta($match);
108 13         150 $content =~ s/^$match//s;
109             } elsif ($match eq '\\') {
110 11 50       30 if ($content =~ m/^(\\[\w\\]+)/) {
    0          
111 11         16 $match = $1;
112             } elsif ($content =~ m/^(\\.)/) {
113 0         0 $match = $1;
114             } else {
115 0         0 die "A \\ Command I don't understand";
116             }
117 11         11 push @{$self->{'parsed'}}, $match;
  11         21  
118 11         16 $match = quotemeta($match);
119 11         109 $content =~ s/^$match//s;
120             } else {
121 0         0 die "Found `$match' where only `{' of `\\' should be";
122             }
123              
124             } else {
125 1         2 push @{$self->{'parsed'}}, $content;
  1         2  
126 1         3 $content = '';
127             }
128             } while ($content ne '');
129 1         4 return $self->{'parsed'};
130             }
131              
132              
133              
134             ##############
135              
136             # Just a little utility program to match nested, single character
137             # delimited quotes. Should make it so one can backslach the
138             # delimiter.
139              
140             sub matching {
141 13     13 0 14 my $begin = shift;
142 13         15 my $end = shift;
143 13         13 my $text = shift;
144              
145 13         14 my $loop = 1;
146 13         14 my $deep = 1;
147              
148 13         25 until ($deep == 0) {
149 272         279 my $c = substr($text, $loop, 1);
150 272 100       418 if ($c eq $begin) {
151 2         3 $deep++;
152             }
153 272 100       381 if ($c eq $end) {
154 15         14 $deep--;
155             }
156 272         439 $loop++;
157             }
158 13         29 return substr($text, 0, $loop);
159             }
160              
161             1;
162             __END__