File Coverage

blib/lib/URI/Find/Delimited.pm
Criterion Covered Total %
statement 48 48 100.0
branch 10 10 100.0
condition 8 10 80.0
subroutine 9 9 100.0
pod 2 2 100.0
total 77 79 97.4


line stmt bran cond sub pod time code
1             package URI::Find::Delimited;
2              
3 1     1   518 use strict;
  1         2  
  1         32  
4              
5 1     1   4 use vars qw( $VERSION );
  1         1  
  1         46  
6             $VERSION = '0.03';
7              
8 1     1   15 use base qw(URI::Find);
  1         1  
  1         494  
9              
10             # For 5.005_03 compatibility (copied from URI::Find::Schemeless)
11 1     1   5821 use URI::Find ();
  1         1  
  1         12  
12 1     1   390 use URI::URL;
  1         2589  
  1         353  
13              
14             =head1 NAME
15              
16             URI::Find::Delimited - Find URIs which may be wrapped in enclosing delimiters.
17              
18             =head1 DESCRIPTION
19              
20             Works like L, but is prepared for URIs in your text to be
21             wrapped in a pair of delimiters and optionally have a title. This will
22             be useful for processing text that already has some minimal markup in
23             it, like bulletin board posts or wiki text.
24              
25             =head1 SYNOPSIS
26              
27             my $finder = URI::Find::Delimited->new;
28             my $text = "This is a [http://the.earth.li/ titled link].";
29             $finder->find(\$text);
30             print $text;
31              
32             =head1 METHODS
33              
34             =over 4
35              
36             =item B
37              
38             my $finder = URI::Find::Delimited->new(
39             callback => \&callback,
40             delimiter_re => [ '\[', '\]' ],
41             ignore_quoted => 1 # defaults to 0
42             );
43              
44             All arguments are optional; defaults are provided (see below).
45              
46             Creates a new URI::Find::Delimited object. This object works similarly
47             to a L object, but as well as just looking for URIs it is also
48             aware of the concept of a wrapped, titled URI. These look something like
49              
50             [http://foo.com/ the foo website]
51              
52             where:
53              
54             =over 4
55              
56             =item * C<[> is the opening delimiter
57              
58             =item * C<]> is the closing delimiter
59              
60             =item * C is the URI
61              
62             =item * C is the title
63              
64             =item * the URI and title are separated by spaces and/or tabs
65              
66             =back
67              
68             The URI::Find::Delimited object will extract each of these parts
69             separately and pass them to your callback.
70              
71             =over 4
72              
73             =item B
74              
75             C is a function which is called on each URI found. It is
76             passed five arguments: the opening delimiter (if found), the closing
77             delimiter (if found), the URI, the title (if found), and any
78             whitespace found between the URI and title.
79              
80             The return value of the callback will replace the original URI in the
81             text.
82              
83             If you do not supply your own callback, the object will create a
84             default one which will put your URIs in 'a href' tags using the URI
85             for the target and the title for the link text. If no title is
86             provided for a URI then the URI itself will be used as the title. If
87             the delimiters aren't balanced (eg if the opening one is present but
88             no closing one is found) then the URI is treated as not being wrapped.
89              
90             Note: the default callback will not remove the delimiters from the
91             text. It should be simple enough to write your own callback to remove
92             them, based on the one in the source, if that's what you want. In fact
93             there's an example in this distribution, in C.
94              
95             =item B
96              
97             The C parameter is optional. If you do supply it then it
98             should be a ref to an array containing two regexes. It defaults to
99             using single square brackets as the delimiters.
100              
101             Don't use capturing groupings C<( )> in your delimiters or things
102             will break. Use non-capturing C<(?: )> instead.
103              
104             =item B
105              
106             If the C parameter is supplied and set to a true value,
107             then any URIs immediately preceded with a double-quote character will
108             not be matched, ie your callback will not be executed for them and
109             they'll be treated just as normal text.
110              
111             This is a bit of a hack but it's in here because I need to be able to
112             ignore things like
113              
114            
115              
116             A better implementation may happen at some point.
117              
118             =back
119              
120             =cut
121              
122             sub new {
123 7     7 1 2073 my ($class, %args) = @_;
124              
125 7         14 my ( $callback, $delimiter_re, $ignore_quoted ) =
126             @args{ qw( callback delimiter_re ignore_quoted ) };
127              
128 7 100       18 unless (defined $callback) {
129             $callback = sub {
130 14     14   22 my ($open, $close, $uri, $title, $whitespace) = @_;
131 14 100 66     39 if ( $open && $close ) {
132 4   66     8 $title ||= $uri;
133 4         19 qq|$open$title$close|;
134             } else {
135 10         75 qq|$open$uri$whitespace$title$close|;
136             }
137 6         25 };
138             }
139 7   100     26 $delimiter_re ||= [ '\[', '\]' ];
140              
141 7         24 my $self = bless { callback => $callback,
142             delimiter_re => $delimiter_re,
143             ignore_quoted => $ignore_quoted
144             }, $class;
145 7         16 return $self;
146             }
147              
148             sub find {
149 16     16 1 3006 my($self, $r_text) = @_;
150              
151 16         16 my $urlsfound = 0;
152              
153 16         35 URI::URL::strict(1); # Don't assume any old thing followed by : is a scheme
154              
155 16         72 my $uri_re = $self->uri_re;
156 16 100       188 my $prefix_re = $self->{ignore_quoted} ? '(?
157 16         22 my $open_re = $self->{delimiter_re}[0];
158 16         19 my $close_re = $self->{delimiter_re}[1];
159              
160             # Note we only allow spaces and tabs, not all whitespace, between a URI
161             # and its title. Also we disallow newlines *in* the title. These are
162             # both to avoid the bug where $uri1\n$uri2 leads to $uri2 being considered
163             # as part of the title, and thus not wrapped.
164 1     1   486 $$r_text =~ s{$prefix_re # maybe don't match things preceded by a "
  1         7  
  1         9  
  16         310  
165             (?:
166             ($open_re) # opening delimiter
167             ($uri_re) # the URI itself
168             ([ \t]*) # optional whitespace between URI and title
169             ((?<=[ \t])[^\n$close_re]+)? #title if there was whitespace
170             ($close_re) # closing delimiter
171             |
172             ($uri_re) # just the URI itself
173             )
174             }{
175 16         892 my ($open, $uri_match, $whitespace, $title, $close, $just_uri) =
176             ($1, $2, $3, $4, $5, $6);
177 16 100       32 $uri_match = $just_uri if $just_uri;
178 16         25 foreach ( $open, $whitespace, $title, $close ) {
179 64   100     143 $_ ||= "";
180             }
181 16         27 my $orig_text = qq|$open$uri_match$whitespace$title$close|;
182              
183 16 100       41 if( my $uri = $self->_is_uri( \$uri_match ) ) { # if not a false alarm
184 15         6149 $urlsfound++;
185 15         27 $self->{callback}->($open,$close,$uri_match,$title,$whitespace);
186             } else {
187 1         786 $orig_text;
188             }
189             }egx;
190              
191 16         14451 return $urlsfound;
192             }
193              
194             =head1 SEE ALSO
195              
196             L.
197              
198             =head1 AUTHOR
199              
200             Kake Pugh (kake@earth.li).
201              
202             =head1 COPYRIGHT
203              
204             Copyright (C) 2003 Kake Pugh. All Rights Reserved.
205              
206             This module is free software; you can redistribute it and/or modify it
207             under the same terms as Perl itself.
208              
209             =head1 CREDITS
210              
211             Tim Bagot helped me stop faffing over the name, by pointing out that
212             RFC 2396 Appendix E uses "delimited". Dave Hinton helped me fix the
213             regex to make it work for delimited URIs with no title. Nick Cleaton
214             helped me make C work. Some of the code was taken from
215             L.
216              
217             =cut
218              
219             1;