File Coverage

blib/lib/Net/Gemini.pm
Criterion Covered Total %
statement 170 199 85.4
branch 69 96 71.8
condition 20 25 80.0
subroutine 31 31 100.0
pod 14 14 100.0
total 304 365 83.2


line stmt bran cond sub pod time code
1             # -*- Perl -*-
2             #
3             # a Gemini protocol client
4             #
5             # "The conjunction of Jupiter with one of the stars of Gemini, which
6             # 'we ourselves have seen' (1.6.343b30) has been dated in recent years
7             # to December 337 BC."
8             # -- Malcolm Wilson. Structure and Method in Aristotle's Meteorologica.
9              
10             # NOTE this silently accepts URI with userinfo; those probably
11             # should be failed?
12             #
13             # KLUGE this may break if the URI module ever gets URI/gemini.pm
14             package URI::gemini {
15 21     21   1736617 use URI;
  21         142868  
  21         653  
16 21     21   10148 use parent 'URI::_server';
  21         5679  
  21         124  
17 107     107   11163 sub default_port { 1965 }
18 19     19   76 sub userinfo { return undef } # gemini has no userinfo
19 19     19   123120 sub secure { 1 }
20              
21             sub canonical {
22 108     108   15456 my $self = shift;
23 108         859 my $other = $self->SUPER::canonical;
24 108         4233 $self->SUPER::userinfo(undef); # gemini has no userinfo
25              
26 108   33     9992 my $slash_path =
27             defined( $other->authority )
28             && !length( $other->path )
29             && !defined( $other->query );
30              
31 108 50       5633 if ($slash_path) {
32 0 0       0 $other = $other->clone if $other == $self;
33 0         0 $other->path("/");
34             }
35 108         318 $other;
36             }
37             }
38              
39             package Net::Gemini;
40             our $VERSION = '0.08';
41 21     21   132893 use strict;
  21         62  
  21         401  
42 21     21   85 use warnings;
  21         61  
  21         505  
43 21     21   11840 use Digest::SHA 'sha256_hex';
  21         55497  
  21         1727  
44 21     21   11516 use Encode ();
  21         191986  
  21         520  
45 21     21   145 use Exporter 'import';
  21         22  
  21         612  
46 21     21   723 use IO::Socket::IP;
  21         37600  
  21         189  
47 21     21   12442 use IO::Socket::SSL;
  21         48526  
  21         151  
48 21     21   3153 use Net::SSLeay;
  21         62  
  21         975  
49 21     21   9664 use Parse::MIME 'parse_mime_type';
  21         24044  
  21         44184  
50              
51             our @EXPORT_OK = qw(gemini_request);
52              
53             sub _DEFAULT_BUFSIZE () { 4096 }
54             sub _DEFAULT_MAX_CONTENT () { 2097152 }
55             sub _DEFAULT_REDIRECTS () { 5 }
56             sub _DEFAULT_REDIRECT_SLEEP () { 1 }
57              
58 19     19 1 827 sub code { $_[0]{_code} } # 0..6 response code
59              
60             sub content {
61 3     3 1 62 $_[0]{_content};
62             } # NOTE only after certain calls and codes
63 18     18 1 192 sub error { $_[0]{_error} } # error message for 0 code
64 17     17 1 204 sub host { $_[0]{_host} }
65 17     17 1 527 sub ip { $_[0]{_ip} }
66 22     22 1 259 sub meta { $_[0]{_meta} }
67 3     3 1 72 sub mime { $_[0]{_mime} } # NOTE only after certain calls and codes
68 17     17 1 85 sub port { $_[0]{_port} }
69 17     17 1 459 sub socket { $_[0]{_socket} }
70              
71             sub status {
72 18     18 1 12824 $_[0]{_status};
73             } # two digit '1x', '2x', ... response code
74 19     19 1 152 sub uri { $_[0]{_uri} }
75              
76             # see VERIFICATION below; the caller should supply a custom callback.
77             # the default is thus "Trust On Almost Any Use" (TOAAU) or similar to
78             # what gg(1) of gmid does
79 84     84   733 sub _verify_ssl { 1 }
80              
81             # minimal method to get a resource (see also ->request)
82             sub get {
83 145     145 1 52597 my ( $class, $source, %param ) = @_;
84 145         609 my %obj;
85 145 100       786 unless ( defined $source ) {
86 19         114 @obj{qw(_code _error)} = ( 0, "source is not defined" );
87 19         171 goto BLESSING;
88             }
89              
90 126         3002 $obj{_uri} = URI->new($source);
91 126 100       48619 unless ( $obj{_uri}->scheme eq 'gemini' ) {
92 19         2014 @obj{qw(_code _error)} = ( 0, "could not parse '$source'" );
93 19         114 goto BLESSING;
94             }
95 107         4720 @obj{qw/_host _port/} = ( $obj{_uri}->host, $obj{_uri}->port );
96              
97 107         14237 my $yuri = $obj{_uri}->canonical;
98 107 100       864 if ( length $yuri > 1024 ) {
99 19         247 @obj{qw(_code _error)} = ( 0, "URI is too long" );
100 19         95 goto BLESSING;
101             }
102              
103             # VERIFICATION is based on the following though much remains up to
104             # the caller to manage
105             # gemini://makeworld.space/gemlog/2020-07-03-tofu-rec.gmi
106             # gemini://alexschroeder.ch/page/2020-07-20%20Does%20a%20Gemini%20certificate%20need%20a%20Common%20Name%20matching%20the%20domain%3F
107             eval {
108             $obj{_socket} = IO::Socket::IP->new(
109             PeerAddr => $obj{_host},
110             PeerPort => $obj{_port},
111 88 100       1998 Proto => 'tcp'
112             ) or die $!;
113 69         70858 $obj{_ip} = $obj{_socket}->peerhost;
114             IO::Socket::SSL->start_SSL(
115             $obj{_socket},
116             SSL_hostname => $obj{_host}, # SNI
117             ( $param{tofu} ? ( SSL_verifycn_scheme => 'none' ) : () ),
118             SSL_verify_callback => sub {
119 84     84   1407943 my ( $ok, $ctx_store, $certname, $error, $cert, $depth ) = @_;
120 84 50       653 if ( $depth != 0 ) {
121 0 0       0 return 1 if $param{tofu};
122 0         0 return $ok;
123             }
124             my $digest = ( $param{verify_ssl} || \&_verify_ssl )->(
125             { host => $obj{_host},
126             port => $obj{_port},
127             cert => $cert, # warning, memory address!
128             # compatible with certID function of amfora
129             digest =>
130             uc( sha256_hex( Net::SSLeay::X509_get_X509_PUBKEY($cert) ) ),
131             ip => $obj{_ip},
132 84   50     9749 notBefore => Net::SSLeay::P_ASN1_TIME_get_isotime(
133             Net::SSLeay::X509_get_notBefore($cert)
134             ),
135             notAfter => Net::SSLeay::P_ASN1_TIME_get_isotime(
136             Net::SSLeay::X509_get_notAfter($cert)
137             ),
138             okay => $ok,
139             }
140             );
141             },
142 69 100       10149 ( exists $param{ssl} ? %{ $param{ssl} } : () ),
  54 100       2507  
    50          
143             ) or die $!;
144 69         120348 1;
145 88 100       884 } or do {
146 19         25213 @obj{qw(_code _error)} = ( 0, "IO::Socket::SSL failed: $@" );
147 19         209 goto BLESSING;
148             };
149              
150 69         1525 binmode $obj{_socket}, ':raw';
151              
152 69         1050 my $n = syswrite $obj{_socket}, "$yuri\r\n";
153 69 50       13155 unless ( defined $n ) {
154 0         0 @obj{qw(_code _error)} = ( 0, "send URI failed: $!" );
155 0         0 goto BLESSING;
156             }
157              
158             # get the STATUS SPACE header response (and, probably, more)
159 69         703 $obj{_buf} = '';
160 69         533 while (1) {
161             my $n = sysread $obj{_socket}, my $buf,
162 145   100     1875 $param{bufsize} || _DEFAULT_BUFSIZE;
163 145 50       36253802 unless ( defined $n ) {
164 0         0 @obj{qw(_code _error)} = ( 0, "recv response failed: $!" );
165 0         0 goto BLESSING;
166             }
167 145 50       1115 if ( $n == 0 ) {
168 0         0 @obj{qw(_code _error)} = ( 0, "recv EOF" );
169 0         0 goto BLESSING;
170             }
171 145         658 $obj{_buf} .= $buf;
172 145 100       923 last if length $obj{_buf} >= 3;
173             }
174             # NOTE this is sloppy; there are fewer "full two digit status codes"
175             # defined in the appendix, e.g. only 10, 11, 20, 30, 31, 40, ...
176             # on the other hand, this supports any new extensions to the
177             # existing numbers
178 69 100       2371 if ( $obj{_buf} =~ m/^(([1-6])[0-9])[ ]/ ) {
179 51         1229 @obj{qw(_status _code)} = ( $1, $2 );
180 51         454 substr $obj{_buf}, 0, 3, '';
181             } else {
182             @obj{qw(_code _error)} = (
183             0,
184             "invalid response " . sprintf "%vx",
185             substr $obj{_buf},
186 18         1656 0, 3
187             );
188 18         486 goto BLESSING;
189             }
190              
191             # META -- at most 1024 characters, followed by \r\n. the loop is in
192             # the event the server is being naughty and trickling bytes in one
193             # by one (probably you will want a timeout somewhere, or an async
194             # version of this code)
195 51   100     1161 my $bufsize = $param{bufsize} || _DEFAULT_BUFSIZE;
196 51         337 while (1) {
197 18559 100       54348 if ( $obj{_buf} =~ m/^(.{0,1024}?)\r\n/ ) {
198 33         585 $obj{_meta} = $1;
199 33         107 my $len = length $obj{_meta};
200 33 100       784 if ( $len == 0 ) {
201             # special case mentioned in the specification
202 2 50       24 $obj{_meta} = 'text/gemini;charset=utf-8' if $obj{_code} == 2;
203             } else {
204             eval {
205             $obj{_meta} =
206 31         962 Encode::decode( 'UTF-8', $obj{_meta}, Encode::FB_CROAK );
207 31         10075 1;
208 31 50       239 } or do {
209 0         0 @obj{qw(_code _error)} = ( 0, "failed to decode meta: $@" );
210 0         0 goto BLESSING;
211             };
212             # another special case (RFC 2045 says that these things
213             # are not case sensitive, hence the (?i) despite the
214             # gemini specification saying "text/")
215 31 100 66     550 if ( $obj{_code} == 2
      100        
216             and $obj{_meta} =~ m{^(?i)text/}
217             and $obj{_meta} !~ m/(?i)charset=/ ) {
218 18         344 $obj{_meta} .= ';charset=utf-8';
219             }
220             }
221 33         131 substr $obj{_buf}, 0, $len + 2, ''; # +2 for the \r\n
222 33         196 last;
223             } else {
224 18526         25069 my $len = length $obj{_buf};
225 18526 100       31163 if ( $len > 1024 ) {
226 18         648 @obj{qw(_code _error)} = ( 0, "meta is too long" );
227 18         594 goto BLESSING;
228             }
229 18508         24062 my $buf;
230 18508         62928 my $n = sysread $obj{_socket}, $buf, $bufsize;
231 18508 50       3753582 unless ( defined $n ) {
232 0         0 @obj{qw(_code _error)} = ( 0, "recv response failed: $!" );
233 0         0 goto BLESSING;
234             }
235 18508 50       37700 if ( $n == 0 ) {
236 0         0 @obj{qw(_code _error)} = ( 0, "recv EOF" );
237 0         0 goto BLESSING;
238             }
239 18508         41191 $obj{_buf} .= $buf;
240             }
241             }
242              
243             BLESSING:
244 145 100 100     2346 close $obj{_socket} if defined $obj{_socket} and $obj{_code} != 2;
245 145         23785 bless( \%obj, $class ), $obj{_code};
246             }
247              
248             # utility function that handles redirects and various means of content
249             # collection
250             sub gemini_request {
251 7     7 1 144 my ( $source, %options ) = @_;
252             $options{max_redirects} = _DEFAULT_REDIRECTS
253 7 100       62 unless exists $options{max_redirects};
254             $options{redirect_delay} = _DEFAULT_REDIRECT_SLEEP
255 7 100       56 unless exists $options{redirect_delay};
256             $options{max_size} = _DEFAULT_MAX_CONTENT
257 7 100       50 unless exists $options{max_size};
258              
259 7         28 my ( $gem, $code );
260 7         29 my $redirects = 0;
261             REQUEST:
262             ( $gem, $code ) = Net::Gemini->get( $source,
263 15 100       298 ( exists $options{param} ? %{ $options{param} } : () ) );
  1         7  
264 15 100 66     159 if ( $code == 2 ) {
    100          
265 5         24 my $len = length $gem->{_buf};
266 5   100     37 my $bufsize = $options{bufsize} || _DEFAULT_BUFSIZE;
267             # this can make uninit noise for a meta of ";" which might be
268             # worth an upstream patch?
269 5         54 $gem->{_mime} = [ parse_mime_type( $gem->meta ) ];
270 5 100       352 if ( exists $options{content_callback} ) {
271 1 50       21 if ($len) {
272 1 50       22 $options{content_callback}->( $gem->{_buf}, $len, $gem )
273             or goto CLEANUP;
274             }
275 0         0 while (1) {
276 0         0 my $buf;
277 0         0 $len = sysread $gem->{_socket}, $buf, $bufsize;
278 0 0       0 if ( !defined $len ) {
    0          
279 0         0 die "sysread failed: $!\n";
280             } elsif ( $len == 0 ) {
281 0         0 last;
282             }
283 0 0       0 $options{content_callback}->( $buf, $len, $gem ) or goto CLEANUP;
284             }
285             } else {
286 4 100       27 if ($len) {
287 3 50       14 if ( $len > $options{max_size} ) {
288 0         0 $gem->{_content} = substr $gem->{_buf}, 0, $options{max_size};
289 0         0 @{$gem}{qw(_code _error)} = ( 0, 'max_size' );
  0         0  
290 0         0 goto CLEANUP;
291             }
292 3         17 $gem->{_content} = $gem->{_buf};
293 3         10 $options{max_size} -= $len;
294             }
295 4         8 while (1) {
296 7         25 my $buf;
297 7         28 $len = sysread $gem->{_socket}, $buf, $bufsize;
298 7 50       337 if ( !defined $len ) {
    100          
299 0         0 die "sysread failed: $!\n";
300             } elsif ( $len == 0 ) {
301 3         10 last;
302             }
303 4 100       13 if ( $len > $options{max_size} ) {
304 1         4 $gem->{_content} .= substr $buf, 0, $options{max_size};
305 1         3 @{$gem}{qw(_code _error)} = ( 0, 'max_size' );
  1         20  
306 1         27 goto CLEANUP;
307             }
308 3         30 $gem->{_content} .= $buf;
309 3         8 $options{max_size} -= $len;
310             }
311             }
312             } elsif ( $code == 3 and ++$redirects <= $options{max_redirects} ) {
313             # a '31' permanent redirect should result in us not requesting
314             # the old URL again, but that would require more code here for
315             # something that is probably rare
316 8         23 my $new = $gem->{_meta};
317 8         73 $source = URI->new_abs( $new, $gem->{_uri} );
318 8         5758336 select( undef, undef, undef, $options{redirect_delay} );
319 8         477 goto REQUEST;
320             }
321             CLEANUP:
322 7         1377 undef $gem->{_buf};
323 7         30 close $gem->{_socket};
324 7         1603 return $gem, $code;
325             }
326              
327             # drain what remains (if anything) via a callback interface. assumes
328             # that a ->get call has been made
329             sub getmore {
330 18     18 1 99 my ( $self, $callback, %param ) = @_;
331              
332 18         156 my $len = length $self->{_buf};
333 18 100       192 if ($len) {
334 17 50       85 $callback->( $self->{_buf}, $len ) or return;
335 17         187 undef $self->{_buf};
336             }
337              
338 18   100     242 my $bufsize = $param{bufsize} || _DEFAULT_BUFSIZE;
339 18         54 while (1) {
340 18         35 my $buf;
341 18         124 $len = sysread $self->{_socket}, $buf, $bufsize;
342 18 50       1483 if ( !defined $len ) {
    100          
343 0         0 die "sysread failed: $!\n";
344             } elsif ( $len == 0 ) {
345 17         51 last;
346             }
347 1 50       6 $callback->( $buf, $len ) or return;
348             }
349 17         102 close $self->{_socket};
350             }
351              
352             1;
353             __END__