File Coverage

blib/lib/URI/Find/Delimited.pm

Criterion	Covered	Total	%
statement	48	48	100.0
branch	10	10	100.0
condition	8	10	80.0
subroutine	9	9	100.0
pod	2	2	100.0
total	77	79	97.4

line	stmt	bran	cond	sub	pod	time	code
1							package URI::Find::Delimited;
2
3	1			1		518	use strict;
	1					2
	1					32
4
5	1			1		4	use vars qw( $VERSION );
	1					1
	1					46
6							$VERSION = '0.03';
7
8	1			1		15	use base qw(URI::Find);
	1					1
	1					494
9
10							# For 5.005_03 compatibility (copied from URI::Find::Schemeless)
11	1			1		5821	use URI::Find ();
	1					1
	1					12
12	1			1		390	use URI::URL;
	1					2589
	1					353
13
14							=head1 NAME
15
16							URI::Find::Delimited - Find URIs which may be wrapped in enclosing delimiters.
17
18							=head1 DESCRIPTION
19
20							Works like L, but is prepared for URIs in your text to be
21							wrapped in a pair of delimiters and optionally have a title. This will
22							be useful for processing text that already has some minimal markup in
23							it, like bulletin board posts or wiki text.
24
25							=head1 SYNOPSIS
26
27							my $finder = URI::Find::Delimited->new;
28							my $text = "This is a [http://the.earth.li/ titled link].";
29							$finder->find(\$text);
30							print $text;
31
32							=head1 METHODS
33
34							=over 4
35
36							=item B
37
38							my $finder = URI::Find::Delimited->new(
39							callback => \&callback,
40							delimiter_re => [ '\[', '\]' ],
41							ignore_quoted => 1 # defaults to 0
42							);
43
44							All arguments are optional; defaults are provided (see below).
45
46							Creates a new URI::Find::Delimited object. This object works similarly
47							to a L object, but as well as just looking for URIs it is also
48							aware of the concept of a wrapped, titled URI. These look something like
49
50							[http://foo.com/ the foo website]
51
52							where:
53
54							=over 4
55
56							=item * C<[> is the opening delimiter
57
58							=item * C<]> is the closing delimiter
59
60							=item * C is the URI
61
62							=item * C is the title
63
64							=item * the URI and title are separated by spaces and/or tabs
65
66							=back
67
68							The URI::Find::Delimited object will extract each of these parts
69							separately and pass them to your callback.
70
71							=over 4
72
73							=item B
74
75							C is a function which is called on each URI found. It is
76							passed five arguments: the opening delimiter (if found), the closing
77							delimiter (if found), the URI, the title (if found), and any
78							whitespace found between the URI and title.
79
80							The return value of the callback will replace the original URI in the
81							text.
82
83							If you do not supply your own callback, the object will create a
84							default one which will put your URIs in 'a href' tags using the URI
85							for the target and the title for the link text. If no title is
86							provided for a URI then the URI itself will be used as the title. If
87							the delimiters aren't balanced (eg if the opening one is present but
88							no closing one is found) then the URI is treated as not being wrapped.
89
90							Note: the default callback will not remove the delimiters from the
91							text. It should be simple enough to write your own callback to remove
92							them, based on the one in the source, if that's what you want. In fact
93							there's an example in this distribution, in C.
94
95							=item B
96
97							The C parameter is optional. If you do supply it then it
98							should be a ref to an array containing two regexes. It defaults to
99							using single square brackets as the delimiters.
100
101							Don't use capturing groupings C<( )> in your delimiters or things
102							will break. Use non-capturing C<(?: )> instead.
103
104							=item B
105
106							If the C parameter is supplied and set to a true value,
107							then any URIs immediately preceded with a double-quote character will
108							not be matched, ie your callback will not be executed for them and
109							they'll be treated just as normal text.
110
111							This is a bit of a hack but it's in here because I need to be able to
112							ignore things like
113
114
115
116							A better implementation may happen at some point.
117
118							=back
119
120							=cut
121
122							sub new {
123	7			7	1	2073	my ($class, %args) = @_;
124
125	7					14	my ( $callback, $delimiter_re, $ignore_quoted ) =
126							@args{ qw( callback delimiter_re ignore_quoted ) };
127
128	7	100				18	unless (defined $callback) {
129							$callback = sub {
130	14			14		22	my ($open, $close, $uri, $title, $whitespace) = @_;
131	14	100	66			39	if ( $open && $close ) {
132	4		66			8	$title \|\|= $uri;
133	4					19	qq\|$open$title$close\|;
134							} else {
135	10					75	qq\|$open$uri$whitespace$title$close\|;
136							}
137	6					25	};
138							}
139	7		100			26	$delimiter_re \|\|= [ '\[', '\]' ];
140
141	7					24	my $self = bless { callback => $callback,
142							delimiter_re => $delimiter_re,
143							ignore_quoted => $ignore_quoted
144							}, $class;
145	7					16	return $self;
146							}
147
148							sub find {
149	16			16	1	3006	my($self, $r_text) = @_;
150
151	16					16	my $urlsfound = 0;
152
153	16					35	URI::URL::strict(1); # Don't assume any old thing followed by : is a scheme
154
155	16					72	my $uri_re = $self->uri_re;
156	16	100				188	my $prefix_re = $self->{ignore_quoted} ? '(?
157	16					22	my $open_re = $self->{delimiter_re}[0];
158	16					19	my $close_re = $self->{delimiter_re}[1];
159
160							# Note we only allow spaces and tabs, not all whitespace, between a URI
161							# and its title. Also we disallow newlines in the title. These are
162							# both to avoid the bug where $uri1\n$uri2 leads to $uri2 being considered
163							# as part of the title, and thus not wrapped.
164	1			1		486	$$r_text =~ s{$prefix_re # maybe don't match things preceded by a "
	1					7
	1					9
	16					310
165							(?:
166							($open_re) # opening delimiter
167							($uri_re) # the URI itself
168							([ \t]*) # optional whitespace between URI and title
169							((?<=[ \t])[^\n$close_re]+)? #title if there was whitespace
170							($close_re) # closing delimiter
171							\|
172							($uri_re) # just the URI itself
173							)
174							}{
175	16					892	my ($open, $uri_match, $whitespace, $title, $close, $just_uri) =
176							($1, $2, $3, $4, $5, $6);
177	16	100				32	$uri_match = $just_uri if $just_uri;
178	16					25	foreach ( $open, $whitespace, $title, $close ) {
179	64		100			143	$_ \|\|= "";
180							}
181	16					27	my $orig_text = qq\|$open$uri_match$whitespace$title$close\|;
182
183	16	100				41	if( my $uri = $self->_is_uri( \$uri_match ) ) { # if not a false alarm
184	15					6149	$urlsfound++;
185	15					27	$self->{callback}->($open,$close,$uri_match,$title,$whitespace);
186							} else {
187	1					786	$orig_text;
188							}
189							}egx;
190
191	16					14451	return $urlsfound;
192							}
193
194							=head1 SEE ALSO
195
196							L.
197
198							=head1 AUTHOR
199
200							Kake Pugh (kake@earth.li).
201
202							=head1 COPYRIGHT
203
204							Copyright (C) 2003 Kake Pugh. All Rights Reserved.
205
206							This module is free software; you can redistribute it and/or modify it
207							under the same terms as Perl itself.
208
209							=head1 CREDITS
210
211							Tim Bagot helped me stop faffing over the name, by pointing out that
212							RFC 2396 Appendix E uses "delimited". Dave Hinton helped me fix the
213							regex to make it work for delimited URIs with no title. Nick Cleaton
214							helped me make C work. Some of the code was taken from
215							L.
216
217							=cut
218
219							1;