line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Pod::Extract::URI; |
2
|
|
|
|
|
|
|
|
3
|
11
|
|
|
11
|
|
692850
|
use strict; |
|
11
|
|
|
|
|
35
|
|
|
11
|
|
|
|
|
1642
|
|
4
|
11
|
|
|
11
|
|
71
|
use warnings; |
|
11
|
|
|
|
|
22
|
|
|
11
|
|
|
|
|
356
|
|
5
|
11
|
|
|
11
|
|
66
|
use Carp; |
|
11
|
|
|
|
|
27
|
|
|
11
|
|
|
|
|
1511
|
|
6
|
11
|
|
|
11
|
|
63240
|
use URI::Find; |
|
11
|
|
|
|
|
342813
|
|
|
11
|
|
|
|
|
934
|
|
7
|
11
|
|
|
11
|
|
86055
|
use URI::Find::Schemeless; |
|
11
|
|
|
|
|
854213
|
|
|
11
|
|
|
|
|
2672
|
|
8
|
11
|
|
|
11
|
|
24681
|
use Pod::Escapes; |
|
11
|
|
|
|
|
74601
|
|
|
11
|
|
|
|
|
1239
|
|
9
|
|
|
|
|
|
|
|
10
|
11
|
|
|
11
|
|
123
|
use base qw(Pod::Parser); |
|
11
|
|
|
|
|
24
|
|
|
11
|
|
|
|
|
59041
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '0.3'; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=pod |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=begin comment |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
General approach: |
19
|
|
|
|
|
|
|
* create a Pod::Parser subclass which has, amongst other things, |
20
|
|
|
|
|
|
|
a reference to a URI::Find object |
21
|
|
|
|
|
|
|
* set up handlers for various POD events |
22
|
|
|
|
|
|
|
* have those handlers call _process() method on their text |
23
|
|
|
|
|
|
|
if we want their URIs |
24
|
|
|
|
|
|
|
* the finder object calls _register_uri() method when it finds |
25
|
|
|
|
|
|
|
URIs, which we stash in the Pod::Extract::URI object to return |
26
|
|
|
|
|
|
|
after parsing |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=end comment |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 NAME |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
Pod::Extract::URI - Extract URIs from POD |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 SYNOPSIS |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use Pod::Extract::URI; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# Get a list of URIs from a file |
41
|
|
|
|
|
|
|
my @uris = Pod::Extract::URI->uris_from_file( $file ); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# Or filehandle |
44
|
|
|
|
|
|
|
my @uris = Pod::Extract::URI->uris_from_filehandle( $filehandle ); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Or the full OO |
47
|
|
|
|
|
|
|
my $parser = Pod::Extract::URI->new(); |
48
|
|
|
|
|
|
|
$parser->parse_from_file( $file ); |
49
|
|
|
|
|
|
|
my @uris = $parser->uris(); |
50
|
|
|
|
|
|
|
my %uri_details = $parser->uri_details(); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 DESCRIPTION |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
This module parses POD and uses C or C |
56
|
|
|
|
|
|
|
to extract any URIs it can. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 METHODS |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=over 4 |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=item new() |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Create a new C object. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
C takes an optional hash of options, whose names correspond to |
68
|
|
|
|
|
|
|
object methods described in more detail below. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=over 4 |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=item schemeless (boolean, default 0) |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Should the parser try to extract schemeless URIs (using C)? |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=item L_only (boolean, default 0) |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Should the parser only look for URIs in LEE sequences? |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=item textblock (boolean, default 1) |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=item verbatim (boolean, default 1) |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=item command (boolean, default 1) |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Should the parser look in POD text paragraph, verbatim blocks, or commands? |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=item schemes (arrayref) |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Restrict URIs to the schemes in the arrayref. |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=item exclude_schemes (arrayref) |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
Exclude URIs with the schemes in the arrayref. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=item stop_uris (arrayref) |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
An arrayref of patterns to ignore. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item stop_sub (coderef) |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
A reference to a subroutine to run for each URI to see if the URI should |
103
|
|
|
|
|
|
|
be ignored. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=item use_canonical (boolean, default 0) |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Convert the URIs found to their canonical form. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item strip_brackets (boolean, default 1) |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Strip extra brackets which may appear around the URL returned by L. |
112
|
|
|
|
|
|
|
See method below for more details. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=back |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=back |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=cut |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub new { |
121
|
29
|
|
|
29
|
1
|
23966
|
my $proto = shift; |
122
|
29
|
|
33
|
|
|
231
|
my $class = ref $proto || $proto; |
123
|
|
|
|
|
|
|
|
124
|
29
|
|
|
|
|
173
|
my %args = @_; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# default arguments |
127
|
|
|
|
|
|
|
my %my_args = ( |
128
|
|
|
|
|
|
|
schemeless => 0, |
129
|
|
|
|
|
|
|
L_only => 0, |
130
|
|
|
|
|
|
|
want_textblock => 1, |
131
|
|
|
|
|
|
|
want_verbatim => 1, |
132
|
|
|
|
|
|
|
want_command => 1, |
133
|
|
|
|
|
|
|
schemes => [], |
134
|
|
|
|
|
|
|
exclude_schemes => [], |
135
|
|
|
|
|
|
|
stop_uris => [], |
136
|
87
|
|
|
87
|
|
273
|
stop_sub => sub { return 0 }, |
137
|
29
|
|
|
|
|
486
|
use_canonical => 0, |
138
|
|
|
|
|
|
|
strip_brackets => 1, |
139
|
|
|
|
|
|
|
); |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# override defaults |
142
|
29
|
|
|
|
|
173
|
for my $arg ( keys %my_args ) { |
143
|
319
|
100
|
|
|
|
856
|
if ( exists $args{ $arg } ) { |
144
|
54
|
|
|
|
|
92
|
$my_args{ $arg } = $args{ $arg }; |
145
|
|
|
|
|
|
|
# remove arguments - anything left will be passed |
146
|
|
|
|
|
|
|
# to Pod::Parser |
147
|
54
|
|
|
|
|
116
|
delete $args{ $arg }; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# instantiate Pod::Parser object |
152
|
|
|
|
|
|
|
# pass any leftover arguments |
153
|
29
|
|
|
|
|
660
|
my $self = $class->SUPER::new( %args ); |
154
|
|
|
|
|
|
|
|
155
|
29
|
|
|
|
|
184
|
$self->{ URIS } = {}; # URI details |
156
|
29
|
|
|
|
|
84
|
$self->{ URI_LIST } = []; # ordered URI list |
157
|
|
|
|
|
|
|
|
158
|
29
|
|
|
|
|
62
|
my $find_class = "URI::Find"; |
159
|
29
|
100
|
|
|
|
113
|
if ( $my_args{ schemeless } ) { |
160
|
4
|
|
|
|
|
11
|
$find_class = "URI::Find::Schemeless"; |
161
|
|
|
|
|
|
|
} |
162
|
29
|
|
|
|
|
78
|
delete $my_args{ schemeless }; # no schemeless() method |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# instantiate finder object with callback closure |
165
|
|
|
|
|
|
|
my $finder = $find_class->new( sub { |
166
|
104
|
|
|
104
|
|
308255
|
$self->_register_uri( @_ ); |
167
|
29
|
|
|
|
|
375
|
} ); |
168
|
29
|
|
|
|
|
535
|
$self->_finder( $finder ); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# call methods for remaining arguments |
171
|
29
|
|
|
|
|
107
|
for my $arg ( keys %my_args ) { |
172
|
290
|
|
|
|
|
987
|
$self->$arg( $my_args{ $arg } ); |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
29
|
|
|
|
|
179
|
return $self; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# process |
179
|
|
|
|
|
|
|
# Use the URI::Find object to find URIs. The URI::Find object has a callback |
180
|
|
|
|
|
|
|
# which will record any URIs it finds |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub _process { |
183
|
150
|
|
|
150
|
|
264
|
my ( $self, $text ) = @_; |
184
|
150
|
|
|
|
|
343
|
$self->_finder->find( \$text ); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# textblock |
188
|
|
|
|
|
|
|
# Overrides Pod::Parser method, handling POD textblock events |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub textblock { |
191
|
134
|
|
|
134
|
1
|
17598
|
my ( $self, $text, $line, $para ) = @_; |
192
|
134
|
|
|
|
|
325
|
$self->_current_line( $line, $para ); # stash current line info for callback |
193
|
134
|
100
|
|
|
|
336
|
if ( $self->want_textblock() ) { |
194
|
|
|
|
|
|
|
# interpolate to get interior sequence expansion |
195
|
84
|
|
|
|
|
7197
|
$text = $self->interpolate( $text, $line ); |
196
|
84
|
100
|
|
|
|
236
|
if ( ! $self->L_only ) { |
197
|
|
|
|
|
|
|
# interpolate() will sort out extraction for L<> if L_only is true |
198
|
73
|
|
|
|
|
226
|
$self->_process( $text, $line ); |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# verbatim |
204
|
|
|
|
|
|
|
# Overrides Pod::Parser method, handling POD verbatim events |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub verbatim { |
207
|
33
|
|
|
33
|
1
|
11395
|
my ( $self, $text, $line, $para ) = @_; |
208
|
33
|
|
|
|
|
89
|
$self->_current_line( $line, $para ); |
209
|
33
|
100
|
66
|
|
|
100
|
if ( $self->want_verbatim() && ! $self->L_only() ) { |
210
|
|
|
|
|
|
|
# L<> not valid in verbatim blocks |
211
|
15
|
|
|
|
|
39
|
$self->_process( $text ); |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# command |
216
|
|
|
|
|
|
|
# Overrides Pod::Parser method, handling POD command events |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub command { |
219
|
103
|
|
|
103
|
1
|
10917
|
my ( $self, $cmd, $text, $line, $para ) = @_; |
220
|
103
|
|
|
|
|
267
|
$self->_current_line( $line, $para ); |
221
|
103
|
100
|
66
|
|
|
469
|
if ( $cmd eq "for" && index( $text, "stop_uris" ) == 0 ) { |
|
|
100
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# We have a stop_uris directive - add them to the |
223
|
|
|
|
|
|
|
# list |
224
|
1
|
|
|
|
|
2
|
my @stop = @{ $self->stop_uris }; |
|
1
|
|
|
|
|
4
|
|
225
|
1
|
|
|
|
|
3
|
$text = substr( $text, 10 ); |
226
|
1
|
|
|
|
|
5
|
push @stop, split /\n/, $text; |
227
|
1
|
|
|
|
|
4
|
$self->stop_uris( \@stop ); |
228
|
|
|
|
|
|
|
} elsif ( $self->want_command() ) { |
229
|
|
|
|
|
|
|
# same logic as for textblock() |
230
|
62
|
|
|
|
|
6548
|
$self->interpolate( $text, $line ); |
231
|
62
|
100
|
|
|
|
163
|
if ( ! $self->L_only() ) { |
232
|
53
|
|
|
|
|
141
|
$self->_process( $text ); |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
# interior_sequence |
238
|
|
|
|
|
|
|
# Overrides Pod::Parser method, handling POD interior_sequence events |
239
|
|
|
|
|
|
|
# Only gets called if we call interpolate() on the containing paragraph |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub interior_sequence { |
242
|
68
|
|
|
68
|
0
|
837
|
my ( $self, $seq_cmd, $seq_arg, $pod_seq ) = @_; |
243
|
68
|
100
|
100
|
|
|
322
|
if ( $seq_cmd eq "L" && $self->L_only ) { |
|
|
100
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# if we have an L<> sequence, process it |
245
|
9
|
|
|
|
|
31
|
$self->_process( $seq_arg ); |
246
|
|
|
|
|
|
|
} elsif ( $seq_cmd eq "E" ) { |
247
|
6
|
|
|
|
|
24
|
return Pod::Escapes::e2char( $seq_arg ); |
248
|
|
|
|
|
|
|
} |
249
|
62
|
|
|
|
|
8225
|
return $seq_arg; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# _register_uri |
253
|
|
|
|
|
|
|
# Handle a URI when we find it |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub _register_uri { |
256
|
104
|
|
|
104
|
|
229
|
my ( $self, $uri, $original_text ) = @_; |
257
|
|
|
|
|
|
|
|
258
|
104
|
|
|
|
|
158
|
my $text = $original_text; |
259
|
104
|
100
|
|
|
|
300
|
if ( $self->strip_brackets ) { |
260
|
100
|
|
|
|
|
222
|
$text =~ s/^<(URL:)?(.*)>$/$2/; |
261
|
|
|
|
|
|
|
} |
262
|
104
|
|
|
|
|
178
|
my $test_text = $text; |
263
|
104
|
|
|
|
|
146
|
my $uri_str = $text; |
264
|
104
|
100
|
|
|
|
472
|
if ( $self->use_canonical ) { |
265
|
|
|
|
|
|
|
# force to canonical form |
266
|
6
|
|
|
|
|
25
|
$uri = $uri->canonical; # looks like URI::Find already does this |
267
|
6
|
|
|
|
|
1018
|
$uri_str = $uri->as_string; |
268
|
6
|
|
|
|
|
30
|
$test_text = $uri_str; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
104
|
|
|
|
|
631
|
my $scheme = $uri->scheme(); |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# check the scheme and URL against the various discriminators |
274
|
|
|
|
|
|
|
|
275
|
104
|
|
|
|
|
2108
|
my $include = $self->schemes; |
276
|
104
|
100
|
100
|
|
|
346
|
if ( scalar @$include && ! grep { $scheme eq $_ } @$include ) { |
|
16
|
|
|
|
|
63
|
|
277
|
4
|
|
|
|
|
16
|
return $text; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
100
|
|
|
|
|
255
|
my $exclude = $self->exclude_schemes; |
281
|
100
|
100
|
100
|
|
|
333
|
if ( scalar @$exclude && grep { $scheme eq $_ } @$exclude ) { |
|
12
|
|
|
|
|
43
|
|
282
|
3
|
|
|
|
|
12
|
return $text; |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
97
|
|
|
|
|
252
|
my $stop = $self->stop_uris; |
286
|
97
|
100
|
100
|
|
|
340
|
if ( scalar @$stop && grep { $test_text =~ $_ } @$stop ) { |
|
11
|
|
|
|
|
81
|
|
287
|
7
|
|
|
|
|
27
|
return $text; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
90
|
100
|
|
|
|
253
|
if ( $self->_check_stop_sub( $uri, $text ) ) { |
291
|
3
|
|
|
|
|
6592
|
return $text; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
87
|
|
|
|
|
272
|
my ( $line, $para ) = $self->_current_line(); |
295
|
|
|
|
|
|
|
|
296
|
87
|
100
|
|
|
|
312
|
if ( ! exists $self->{ URIS }->{ $uri_str } ) { |
297
|
83
|
|
|
|
|
290
|
$self->{ URIS }->{ $uri_str } = []; |
298
|
|
|
|
|
|
|
} |
299
|
87
|
|
|
|
|
124
|
push @{ $self->{ URIS }->{ $uri_str } }, { |
|
87
|
|
|
|
|
617
|
|
300
|
|
|
|
|
|
|
uri => $uri, |
301
|
|
|
|
|
|
|
text => $text, |
302
|
|
|
|
|
|
|
original_text => $original_text, |
303
|
|
|
|
|
|
|
line => $line, |
304
|
|
|
|
|
|
|
para => $para, |
305
|
|
|
|
|
|
|
}; |
306
|
87
|
|
|
|
|
137
|
push @{ $self->{ URI_LIST } }, $uri_str; |
|
87
|
|
|
|
|
191
|
|
307
|
87
|
|
|
|
|
332
|
return $text; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# _current_line |
311
|
|
|
|
|
|
|
# Store the current line and Pod::Paragraph object, as passed to the |
312
|
|
|
|
|
|
|
# Pod::Parser methods, so that _register_uri() can store them if |
313
|
|
|
|
|
|
|
# necessary. |
314
|
|
|
|
|
|
|
# Returns the current line in scalar context, and the current line and |
315
|
|
|
|
|
|
|
# Pod::Paragraph object in list context. |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
sub _current_line { |
318
|
357
|
|
|
357
|
|
805
|
my ( $self, $line, $para ) = @_; |
319
|
357
|
100
|
|
|
|
1057
|
if ( defined $line ) { |
320
|
270
|
|
|
|
|
533
|
$self->{ CURRENT_LINE } = $line; |
321
|
270
|
50
|
|
|
|
599
|
if ( defined $para ) { |
322
|
270
|
|
|
|
|
487
|
$self->{ CURRENT_PARA } = $para; |
323
|
|
|
|
|
|
|
} else { |
324
|
0
|
|
|
|
|
0
|
delete $self->{ CURRENT_PARA }; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
} |
327
|
357
|
100
|
|
|
|
6788
|
if ( wantarray ) { |
328
|
87
|
|
|
|
|
271
|
return ( $self->{ CURRENT_LINE }, $self->{ CURRENT_PARA } ); |
329
|
|
|
|
|
|
|
} else { |
330
|
270
|
|
|
|
|
589
|
return $self->{ CURRENT_LINE }; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
# _finder |
335
|
|
|
|
|
|
|
# Get/set the URI finder object |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
sub _finder { |
338
|
181
|
|
|
181
|
|
289
|
my ( $self, $finder ) = @_; |
339
|
181
|
100
|
|
|
|
453
|
if ( defined $finder ) { |
340
|
29
|
|
|
|
|
74
|
$self->{ FINDER } = $finder; |
341
|
|
|
|
|
|
|
} |
342
|
181
|
|
|
|
|
821
|
return $self->{ FINDER }; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=head2 L_only() |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
Get/set the L_only flag. Takes one optional true/false argument to |
348
|
|
|
|
|
|
|
set the L_only flag. Defaults to false. |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
If true, C will look for URIs only in CE> |
351
|
|
|
|
|
|
|
sequences, otherwise it will look anywhere in the POD. |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=cut |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
sub L_only { |
356
|
244
|
|
|
244
|
1
|
441
|
my ( $self, $l_only ) = @_; |
357
|
244
|
100
|
|
|
|
593
|
if ( defined $l_only ) { |
358
|
30
|
|
|
|
|
78
|
$self->{ L_ONLY } = $l_only; |
359
|
|
|
|
|
|
|
} |
360
|
244
|
|
|
|
|
2384
|
return $self->{ L_ONLY }; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=head2 want_command() |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
Get/set the want_command flag. Takes one optional true/false argument to |
366
|
|
|
|
|
|
|
set the want_command flag. Defaults to true. |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
If true, C will look for URIs in command blocks (i.e. |
369
|
|
|
|
|
|
|
C<=head1>, etc.). |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=cut |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
sub want_command { |
374
|
135
|
|
|
135
|
1
|
215
|
my ( $self, $command ) = @_; |
375
|
135
|
100
|
|
|
|
321
|
if ( defined $command ) { |
376
|
30
|
|
|
|
|
82
|
$self->{ WANT_COMMAND } = $command; |
377
|
|
|
|
|
|
|
} |
378
|
135
|
|
|
|
|
2269
|
return $self->{ WANT_COMMAND }; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head2 want_textblock() |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
Get/set the want_textblock flag. Takes one optional true/false argument to |
384
|
|
|
|
|
|
|
set the want_textblock flag. Defaults to true. |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
If true, C will look for URIs in textblocks (i.e. |
387
|
|
|
|
|
|
|
paragraphs). |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=cut |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub want_textblock { |
392
|
167
|
|
|
167
|
1
|
264
|
my ( $self, $textblock ) = @_; |
393
|
167
|
100
|
|
|
|
500
|
if ( defined $textblock ) { |
394
|
30
|
|
|
|
|
144
|
$self->{ WANT_TEXTBLOCK } = $textblock; |
395
|
|
|
|
|
|
|
} |
396
|
167
|
|
|
|
|
4383
|
return $self->{ WANT_TEXTBLOCK }; |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=head2 want_verbatim() |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
Get/set the want_verbatim flag. Takes one optional true/false argument to |
402
|
|
|
|
|
|
|
set the want_verbatim flag. Defaults to true. |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
If true, C will look for URIs in verbatim blocks (i.e. |
405
|
|
|
|
|
|
|
code examples, etc.). |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=cut |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
sub want_verbatim { |
410
|
66
|
|
|
66
|
1
|
133
|
my ( $self, $verbatim ) = @_; |
411
|
66
|
100
|
|
|
|
318
|
if ( defined $verbatim ) { |
412
|
30
|
|
|
|
|
65
|
$self->{ WANT_VERBATIM } = $verbatim; |
413
|
|
|
|
|
|
|
} |
414
|
66
|
|
|
|
|
1157
|
return $self->{ WANT_VERBATIM }; |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head2 schemes() |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
$peu->schemes( [ 'http', 'ftp' ] ); |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
Get/set the list of schemes to search for. Takes an optional arrayref of |
422
|
|
|
|
|
|
|
schemes to set. |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
If there are no schemes, C will look for all schemes. |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=cut |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
sub schemes { |
429
|
138
|
|
|
138
|
1
|
247
|
my ( $self, $schemes ) = @_; |
430
|
138
|
100
|
|
|
|
374
|
if ( defined $schemes ) { |
431
|
31
|
100
|
|
|
|
112
|
if ( ref $schemes eq "ARRAY" ) { |
432
|
30
|
|
|
|
|
103
|
$self->{ SCHEMES } = $schemes; |
433
|
|
|
|
|
|
|
} else { |
434
|
1
|
|
|
|
|
234
|
carp "Argument to schemes() must be an arrayref"; |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
} |
437
|
138
|
|
|
|
|
419
|
return $self->{ SCHEMES }; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=head2 exclude_schemes() |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
$peu->exclude_schemes( [ 'mailto', 'https' ] ); |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
Get/set the list of schemes to ignore. Takes an optional arrayref of |
445
|
|
|
|
|
|
|
schemes to set. |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
=cut |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
sub exclude_schemes { |
450
|
134
|
|
|
134
|
1
|
252
|
my ( $self, $schemes ) = @_; |
451
|
134
|
100
|
|
|
|
329
|
if ( defined $schemes ) { |
452
|
31
|
100
|
|
|
|
129
|
if ( ref $schemes eq "ARRAY" ) { |
453
|
30
|
|
|
|
|
96
|
$self->{ EXCLUDE_SCHEMES } = $schemes; |
454
|
|
|
|
|
|
|
} else { |
455
|
1
|
|
|
|
|
150
|
carp "Argument to exclude_schemes() must be an arrayref"; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
} |
458
|
134
|
|
|
|
|
599
|
return $self->{ EXCLUDE_SCHEMES }; |
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head2 stop_uris() |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
$peu->stop_uris( [ |
464
|
|
|
|
|
|
|
qr/example\.com/, |
465
|
|
|
|
|
|
|
'foobar.com' |
466
|
|
|
|
|
|
|
] ); |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Get/set a list of patterns to apply to each URI to see if it should be |
469
|
|
|
|
|
|
|
ignored. Takes an optional arrayref of patterns to set. Strings in the list |
470
|
|
|
|
|
|
|
will be automatically converted to patterns (using qr//). |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
The URIs will be checked against the canonical URI form if C |
473
|
|
|
|
|
|
|
has been specified. Otherwise, they will be checked against the URI as it |
474
|
|
|
|
|
|
|
appears in the POD. If C is specified, the brackets (and |
475
|
|
|
|
|
|
|
"URL:" prefix, if present) will be removed before testing. |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
Any URI that matches a pattern will be ignored. |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=cut |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
sub stop_uris { |
482
|
133
|
|
|
133
|
1
|
221
|
my ( $self, $urls ) = @_; |
483
|
133
|
100
|
|
|
|
366
|
if ( defined $urls ) { |
484
|
32
|
100
|
|
|
|
134
|
if ( ref $urls eq "ARRAY" ) { |
485
|
31
|
100
|
|
|
|
79
|
my @urls = map { UNIVERSAL::isa( $_, "Regexp" ) ? $_ : qr/$_/ } @$urls; |
|
8
|
|
|
|
|
239
|
|
486
|
31
|
|
|
|
|
104
|
$self->{ STOP_URLS } = \@urls; |
487
|
|
|
|
|
|
|
} else { |
488
|
1
|
|
|
|
|
152
|
carp "Argument to stop_uris() must be an arrayref"; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
} |
491
|
133
|
|
|
|
|
404
|
return $self->{ STOP_URLS }; |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=head2 stop_sub() |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
sub exclude { |
497
|
|
|
|
|
|
|
my $uri = shift; |
498
|
|
|
|
|
|
|
return ( $uri->host =~ /example\.com/ ) ? 1 : 0; |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
$peu->stop_sub( \&exclude ); |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Get/set a subroutine to check each URI found to see if it should be ignored. |
503
|
|
|
|
|
|
|
Takes an optional coderef to set. |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
The subroutine will be passed a reference to the C object, the text found |
506
|
|
|
|
|
|
|
by C, and a reference to the C object. If it |
507
|
|
|
|
|
|
|
returns true, the URI will be ignored. |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=cut |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
sub stop_sub { |
512
|
31
|
|
|
31
|
1
|
526
|
my ( $self, $sub ) = @_; |
513
|
31
|
50
|
|
|
|
115
|
if ( defined $sub ) { |
514
|
31
|
100
|
|
|
|
108
|
if ( ref $sub eq "CODE" ) { |
515
|
30
|
|
|
|
|
94
|
$self->{ STOP_SUB } = $sub; |
516
|
|
|
|
|
|
|
} else { |
517
|
1
|
|
|
|
|
130
|
carp "Argument to stop_sub() must be a coderef"; |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
} |
520
|
31
|
|
|
|
|
209
|
return $self->{ STOP_SUB }; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# _check_stop_sub |
524
|
|
|
|
|
|
|
# Call the stop sub with the right arguments |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
sub _check_stop_sub { |
527
|
93
|
|
|
93
|
|
302
|
my ( $self, $uri, $text ) = @_; |
528
|
93
|
|
|
|
|
165
|
my $sub = $self->{ STOP_SUB }; |
529
|
93
|
|
|
|
|
237
|
return &$sub( $uri, $text, $self ); |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=head2 use_canonical() |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
Get/set the use_canonical flag. Takes one optional true/false argument to |
535
|
|
|
|
|
|
|
set the use_canonical flag. Defaults to false. |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
If true, C will store the URIs it finds in the canonical |
538
|
|
|
|
|
|
|
form (as returned by Ccanonical()>. The original URI and text will |
539
|
|
|
|
|
|
|
still be available via C. |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=cut |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
sub use_canonical { |
544
|
137
|
|
|
137
|
1
|
1504
|
my ( $self, $use ) = @_; |
545
|
137
|
100
|
|
|
|
477
|
if ( defined $use ) { |
546
|
30
|
|
|
|
|
91
|
$self->{ USE_CANONICAL } = $use; |
547
|
|
|
|
|
|
|
} |
548
|
137
|
|
|
|
|
437
|
return $self->{ USE_CANONICAL }; |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
=head2 strip_brackets() |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
Get/set the strip_brackets flag. Takes one optional true/false argument to |
554
|
|
|
|
|
|
|
set the strip_brackets flag. Defaults to true. |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
RFC 2396 Appendix E suggests the form Chttp://www.example.com/E> |
557
|
|
|
|
|
|
|
or CURL:http://www.example.com/E> when embedding URLs in plain text. |
558
|
|
|
|
|
|
|
C includes these in the URLs it returns. If C is |
559
|
|
|
|
|
|
|
true, this extra stuff will be removed and won't appear in the URIs returned |
560
|
|
|
|
|
|
|
by C. |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=cut |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
sub strip_brackets { |
565
|
137
|
|
|
137
|
1
|
337
|
my ( $self, $strip ) = @_; |
566
|
137
|
100
|
|
|
|
425
|
if ( defined $strip ) { |
567
|
30
|
|
|
|
|
199
|
$self->{ STRIP_BRACKETS } = $strip; |
568
|
|
|
|
|
|
|
} |
569
|
137
|
|
|
|
|
473
|
return $self->{ STRIP_BRACKETS }; |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=head2 parse_from_file() |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
$peu->parse_from_file( $filename ); |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
Parses the POD from the specified file and stores the URIs it finds for later |
577
|
|
|
|
|
|
|
retrieval. |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=head2 parse_from_filehandle() |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
$peu->parse_from_filehandle( $filehandle ); |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
Parses the POD from the filehandle and stores the URIs it finds for later |
584
|
|
|
|
|
|
|
retrieval. |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
=head2 uris_from_file() |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
my @uris = $peu->uris_from_file( $filename ); |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
A shortcut for C then C. |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=cut |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
sub uris_from_file { |
595
|
19
|
|
|
19
|
1
|
207
|
my ( $self, $file ) = @_; |
596
|
19
|
100
|
|
|
|
80
|
if ( ! ref $self ) { |
597
|
2
|
|
|
|
|
12
|
$self = $self->new(); |
598
|
|
|
|
|
|
|
} |
599
|
19
|
|
|
|
|
5664
|
$self->parse_from_file( $file ); |
600
|
19
|
|
|
|
|
2012
|
return $self->uris; |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=head2 uris_from_filehandle() |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
my @uris = $peu->uris_from_filehandle( $filename ); |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
A shortcut for C then C. |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
=cut |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
sub uris_from_filehandle { |
612
|
1
|
|
|
1
|
1
|
1001
|
my ( $self, $file ) = @_; |
613
|
1
|
50
|
|
|
|
6
|
if ( ! ref $self ) { |
614
|
1
|
|
|
|
|
5
|
$self = $self->new(); |
615
|
|
|
|
|
|
|
} |
616
|
1
|
|
|
|
|
154
|
$self->parse_from_filehandle( $file ); |
617
|
1
|
|
|
|
|
111
|
return @{ $self->{ URI_LIST } }; |
|
1
|
|
|
|
|
12
|
|
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
=head2 uris() |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
my @uris = $peu->uris(); |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
Returns a list of the URIs found from parsing. |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=cut |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
sub uris { |
629
|
20
|
|
|
20
|
1
|
266
|
my $self = shift; |
630
|
20
|
|
|
|
|
31
|
return @{ $self->{ URI_LIST } }; |
|
20
|
|
|
|
|
164
|
|
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=head2 uri_details() |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
my %details = $peu->uri_details(); |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
Returns a hash of data about the URIs found. |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
The keys of the hash are the URIs (which match those returned by C). |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
The values of the hash are arrayrefs of hashrefs. Each hashref contains |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=over 4 |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=item uri |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
The URI object returned by C. |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=item text |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
The text returned by C, which will have the brackets stripped |
652
|
|
|
|
|
|
|
from it if C has been specified. |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=item original_text |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
The original text returned by C. |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=item line |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
The initial line number of the paragraph in which the URI was found. |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=item para |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
The C object corresponding to the paragraph where the URI |
665
|
|
|
|
|
|
|
was found. |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
=back |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=cut |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
sub uri_details { |
672
|
4
|
|
|
4
|
1
|
1717
|
my $self = shift; |
673
|
4
|
|
|
|
|
8
|
return %{ $self->{ URIS } }; |
|
4
|
|
|
|
|
36
|
|
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
=head1 STOP URIS |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
You can specify URIs to ignore in your POD, using a C<=for stop_uris> |
679
|
|
|
|
|
|
|
command, e.g. |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
=for stop_uris www.foobar.com |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
These will be converted to patterns as if they had been passed in via |
684
|
|
|
|
|
|
|
C directly, and will apply from the point of the command |
685
|
|
|
|
|
|
|
onwards. |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
=head1 AUTHOR |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
Ian Malpass (ian-cpan@indecorous.com) |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
=head1 COPYRIGHT |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
Copyright 2007, Ian Malpass |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify it |
698
|
|
|
|
|
|
|
under the same terms as Perl itself. |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=head1 SEE ALSO |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
L, L, L. |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
=cut |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
1; |