File Coverage

blib/lib/URI/Find/Delimited.pm
Criterion Covered Total %
statement 22 42 52.3
branch 1 10 10.0
condition 1 10 10.0
subroutine 6 7 85.7
pod 2 2 100.0
total 32 71 45.0


line stmt bran cond sub pod time code
1             package URI::Find::Delimited;
2              
3 1     1   1787 use strict;
  1         2  
  1         43  
4              
5 1     1   5 use vars qw( $VERSION );
  1         1  
  1         58  
6             $VERSION = '0.02';
7              
8 1     1   22 use base qw(URI::Find);
  1         2  
  1         939  
9              
10             # For 5.005_03 compatibility (copied from URI::Find::Schemeless)
11 1     1   17814 use URI::Find ();
  1         2  
  1         592  
12              
13             =head1 NAME
14              
15             URI::Find::Delimited - Find URIs which may be wrapped in enclosing delimiters.
16              
17             =head1 DESCRIPTION
18              
19             Works like L, but is prepared for URIs in your text to be
20             wrapped in a pair of delimiters and optionally have a title. This will
21             be useful for processing text that already has some minimal markup in
22             it, like bulletin board posts or wiki text.
23              
24             =head1 SYNOPSIS
25              
26             my $finder = URI::Find::Delimited->new;
27             my $text = "This is a [http://the.earth.li/ titled link].";
28             $finder->find(\$text);
29             print $text;
30              
31             =head1 METHODS
32              
33             =over 4
34              
35             =item B
36              
37             my $finder = URI::Find::Delimited->new(
38             callback => \&callback,
39             delimiter_re => [ '\[', '\]' ],
40             ignore_quoted => 1 # defaults to 0
41             );
42              
43             All arguments are optional; defaults are provided (see below).
44              
45             Creates a new URI::Find::Delimited object. This object works similarly
46             to a L object, but as well as just looking for URIs it is also
47             aware of the concept of a wrapped, titled URI. These look something like
48              
49             [http://foo.com/ the foo website]
50              
51             where:
52              
53             =over 4
54              
55             =item * C<[> is the opening delimiter
56              
57             =item * C<]> is the closing delimiter
58              
59             =item * C is the URI
60              
61             =item * C is the title
62              
63             =item * the URI and title are separated by spaces and/or tabs
64              
65             =back
66              
67             The URI::Find::Delimited object will extract each of these parts
68             separately and pass them to your callback.
69              
70             =over 4
71              
72             =item B
73              
74             C is a function which is called on each URI found. It is
75             passed five arguments: the opening delimiter (if found), the closing
76             delimiter (if found), the URI, the title (if found), and any
77             whitespace found between the URI and title.
78              
79             The return value of the callback will replace the original URI in the
80             text.
81              
82             If you do not supply your own callback, the object will create a
83             default one which will put your URIs in 'a href' tags using the URI
84             for the target and the title for the link text. If no title is
85             provided for a URI then the URI itself will be used as the title. If
86             the delimiters aren't balanced (eg if the opening one is present but
87             no closing one is found) then the URI is treated as not being wrapped.
88              
89             Note: the default callback will not remove the delimiters from the
90             text. It should be simple enough to write your own callback to remove
91             them, based on the one in the source, if that's what you want. In fact
92             there's an example in this distribution, in C.
93              
94             =item B
95              
96             The C parameter is optional. If you do supply it then it
97             should be a ref to an array containing two regexes. It defaults to
98             using single square brackets as the delimiters.
99              
100             Don't use capturing groupings C<( )> in your delimiters or things
101             will break. Use non-capturing C<(?: )> instead.
102              
103             =item B
104              
105             If the C parameter is supplied and set to a true value,
106             then any URIs immediately preceded with a double-quote character will
107             not be matched, ie your callback will not be executed for them and
108             they'll be treated just as normal text.
109              
110             This is kinda lame but it's in here because I need to be able to
111             ignore things like
112              
113            
114              
115             A better implementation may happen at some point.
116              
117             =back
118              
119             =cut
120              
121             sub new {
122 1     1 1 492 my ($class, %args) = @_;
123              
124 1         3 my ( $callback, $delimiter_re, $ignore_quoted ) =
125             @args{ qw( callback delimiter_re ignore_quoted ) };
126              
127 1 50       5 unless (defined $callback) {
128             $callback = sub {
129 0     0   0 my ($open, $close, $uri, $title, $whitespace) = @_;
130 0 0 0     0 if ( $open && $close ) {
131 0   0     0 $title ||= $uri;
132 0         0 qq|$open$title$close|;
133             } else {
134 0         0 qq|$open$uri$whitespace$title$close|;
135             }
136 1         7 };
137             }
138 1   50     10 $delimiter_re ||= [ '\[', '\]' ];
139              
140 1         6 my $self = bless { callback => $callback,
141             delimiter_re => $delimiter_re,
142             ignore_quoted => $ignore_quoted
143             }, $class;
144 1         4 return $self;
145             }
146              
147             sub find {
148 1     1 1 11 my($self, $r_text) = @_;
149              
150 1         2 my $urlsfound = 0;
151              
152 1         166 URI::URL::strict(1); # Don't assume any old thing followed by : is a scheme
153              
154 0           my $uri_re = $self->uri_re;
155 0 0         my $prefix_re = $self->{ignore_quoted} ? '(?
156 0           my $open_re = $self->{delimiter_re}[0];
157 0           my $close_re = $self->{delimiter_re}[1];
158              
159             # Note we only allow spaces and tabs, not all whitespace, between a URI
160             # and its title. Also we disallow newlines *in* the title. These are
161             # both to avoid the bug where $uri1\n$uri2 leads to $uri2 being considered
162             # as part of the title, and thus not wrapped.
163 0           $$r_text =~ s{$prefix_re # maybe don't match things preceded by a "
164             (?:
165             ($open_re) # opening delimiter
166             ($uri_re) # the URI itself
167             ([ \t]*) # optional whitespace between URI and title
168             ((?<=[ \t])[^\n$close_re]+)? #title if there was whitespace
169             ($close_re) # closing delimiter
170             |
171             ($uri_re) # just the URI itself
172             )
173             }{
174 0           my ($open, $uri_match, $whitespace, $title, $close, $just_uri) =
175             ($1, $2, $3, $4, $5, $6);
176 0 0         $uri_match = $just_uri if $just_uri;
177 0           foreach ( $open, $whitespace, $title, $close ) {
178 0   0       $_ ||= "";
179             }
180 0           my $orig_text = qq|$open$uri_match$whitespace$title$close|;
181              
182 0 0         if( my $uri = $self->_is_uri( \$uri_match ) ) { # if not a false alarm
183 0           $urlsfound++;
184 0           $self->{callback}->($open,$close,$uri_match,$title,$whitespace);
185             } else {
186 0           $orig_text;
187             }
188             }egx;
189              
190 0           return $urlsfound;
191             }
192              
193             =head1 SEE ALSO
194              
195             L.
196              
197             =head1 AUTHOR
198              
199             Kake Pugh (kake@earth.li).
200              
201             =head1 COPYRIGHT
202              
203             Copyright (C) 2003 Kake Pugh. All Rights Reserved.
204              
205             This module is free software; you can redistribute it and/or modify it
206             under the same terms as Perl itself.
207              
208             =head1 CREDITS
209              
210             Tim Bagot helped me stop faffing over the name, by pointing out that
211             RFC 2396 Appendix E uses "delimited". Dave Hinton helped me fix the
212             regex to make it work for delimited URIs with no title. Nick Cleaton
213             helped me make C work. Some of the code was taken from
214             L.
215              
216             =cut
217              
218             1;