File Coverage

blib/lib/Net/Gemini.pm
Criterion Covered Total %
statement 162 193 83.9
branch 68 96 70.8
condition 20 25 80.0
subroutine 28 28 100.0
pod 13 13 100.0
total 291 355 81.9


line stmt bran cond sub pod time code
1             # -*- Perl -*-
2             #
3             # a Gemini protocol client
4             #
5             # "The conjunction of Jupiter with one of the stars of Gemini, which
6             # 'we ourselves have seen' (1.6.343b30) has been dated in recent years
7             # to December 337 BC."
8             # -- Malcolm Wilson. Structure and Method in Aristotle's Meteorologica.
9              
10             # NOTE this silently accepts URI with userinfo; those probably
11             # should be failed?
12             #
13             # KLUGE this may break if the URI module ever gets URI/gemini.pm
14             package URI::gemini {
15 21     21   1872268 use URI;
  21         153798  
  21         691  
16 21     21   11133 use parent 'URI::_server';
  21         7092  
  21         106  
17 107     107   12332 sub default_port { 1965 }
18 19     19   76 sub userinfo { return undef } # gemini has no userinfo
19 19     19   136838 sub secure { 1 }
20              
21             sub canonical {
22 108     108   15063 my $self = shift;
23 108         405 my $other = $self->SUPER::canonical;
24 108         4472 $self->SUPER::userinfo(undef); # gemini has no userinfo
25              
26 108   33     9020 my $slash_path =
27             defined( $other->authority )
28             && !length( $other->path )
29             && !defined( $other->query );
30              
31 108 50       5633 if ($slash_path) {
32 0 0       0 $other = $other->clone if $other == $self;
33 0         0 $other->path("/");
34             }
35 108         324 $other;
36             }
37             }
38              
39             package Net::Gemini;
40             our $VERSION = '0.07';
41 21     21   144847 use strict;
  21         43  
  21         437  
42 21     21   313 use warnings;
  21         62  
  21         765  
43 21     21   12925 use Encode ();
  21         205047  
  21         556  
44 21     21   143 use Exporter 'import';
  21         42  
  21         591  
45 21     21   1029 use IO::Socket::SSL;
  21         89584  
  21         168  
46 21     21   3317 use Net::SSLeay;
  21         43  
  21         846  
47 21     21   9807 use Parse::MIME 'parse_mime_type';
  21         27541  
  21         46679  
48              
49             our @EXPORT_OK = qw(gemini_request);
50              
51             sub _DEFAULT_BUFSIZE () { 4096 }
52             sub _DEFAULT_MAX_CONTENT () { 2097152 }
53             sub _DEFAULT_REDIRECTS () { 5 }
54             sub _DEFAULT_REDIRECT_SLEEP () { 1 }
55              
56 19     19 1 1013 sub code { $_[0]{_code} } # 0..6 response code
57 3     3 1 77 sub content { $_[0]{_content} } # NOTE only after certain calls and codes
58 18     18 1 431 sub error { $_[0]{_error} } # error message for 0 code
59 17     17 1 323 sub host { $_[0]{_host} }
60 22     22 1 408 sub meta { $_[0]{_meta} }
61 3     3 1 74 sub mime { $_[0]{_mime} } # NOTE only after certain calls and codes
62 17     17 1 102 sub port { $_[0]{_port} }
63 17     17 1 442 sub socket { $_[0]{_socket} }
64 18     18 1 13724 sub status { $_[0]{_status} } # two digit '1x', '2x', ... response code
65 19     19 1 2095 sub uri { $_[0]{_uri} }
66              
67             # see VERIFICATION below; the caller should supply a custom callback.
68             # the default is thus "Trust On Almost Any Use" (TOAAU) or similar to
69             # what gg(1) of gmid does
70 84     84   664 sub _verify_ssl { 1 }
71              
72             # minimal method to get a resource (see also ->request)
73             sub get {
74 145     145 1 60340 my ( $class, $source, %param ) = @_;
75 145         757 my %obj;
76 145 100       571 unless ( defined $source ) {
77 19         95 @obj{qw(_code _error)} = ( 0, "source is not defined" );
78 19         171 goto BLESSING;
79             }
80              
81 126         3821 $obj{_uri} = URI->new($source);
82 126 100       51760 unless ( $obj{_uri}->scheme eq 'gemini' ) {
83 19         2128 @obj{qw(_code _error)} = ( 0, "could not parse '$source'" );
84 19         133 goto BLESSING;
85             }
86 107         4932 @obj{qw/_host _port/} = ( $obj{_uri}->host, $obj{_uri}->port );
87              
88 107         12200 my $yuri = $obj{_uri}->canonical;
89 107 100       509 if ( length $yuri > 1024 ) {
90 19         209 @obj{qw(_code _error)} = ( 0, "URI is too long" );
91 19         114 goto BLESSING;
92             }
93              
94             # VERIFICATION is based on the following though much remains up to
95             # the caller to manage
96             # gemini://makeworld.space/gemlog/2020-07-03-tofu-rec.gmi
97             # gemini://alexschroeder.ch/page/2020-07-20%20Does%20a%20Gemini%20certificate%20need%20a%20Common%20Name%20matching%20the%20domain%3F
98             eval {
99             $obj{_socket} = IO::Socket::SSL->new(
100             SSL_hostname => $obj{_host}, # SNI
101             ( $param{tofu} ? ( SSL_verifycn_scheme => 'none' ) : () ),
102             SSL_verify_callback => sub {
103 84     84   1632712 my ( $ok, $ctx_store, $certname, $error, $cert, $depth ) = @_;
104 84 50       419 if ( $depth != 0 ) {
105 0 0       0 return 1 if $param{tofu};
106 0         0 return $ok;
107             }
108             ( $param{verify_ssl} || \&_verify_ssl )->(
109 84   50     7200 @obj{qw(_host _port)},
110             Net::SSLeay::X509_get_fingerprint( $cert, 'sha256' ),
111             Net::SSLeay::P_ASN1_TIME_get_isotime( Net::SSLeay::X509_get_notBefore($cert) ),
112             Net::SSLeay::P_ASN1_TIME_get_isotime( Net::SSLeay::X509_get_notAfter($cert) ),
113             $ok,
114             $cert
115             );
116             },
117 73         2069 ( exists $param{ssl} ? %{ $param{ssl} } : () ),
118             PeerHost => $obj{_host},
119             PeerPort => $obj{_port},
120 88 100       2447 ) or die $!;
    100          
    50          
121 69         124058 1;
122 88 100       807 } or do {
123 19         9614 @obj{qw(_code _error)} = ( 0, "IO::Socket::SSL failed: $@" );
124 19         171 goto BLESSING;
125             };
126              
127 69         729 binmode $obj{_socket}, ':raw';
128              
129 69         753 my $n = syswrite $obj{_socket}, "$yuri\r\n";
130 69 50       11133 unless ( defined $n ) {
131 0         0 @obj{qw(_code _error)} = ( 0, "send URI failed: $!" );
132 0         0 goto BLESSING;
133             }
134             # KLUGE we're done with the connection as a writer at this point,
135             # but IO::Socket::SSL does not appear to offer a public means to
136             # only call shutdown and nothing else. using this is a bit risky
137             # should the IO::Socket::SSL internals change
138 0         0 Net::SSLeay::shutdown( ${ *{ $obj{_socket} } }{'_SSL_object'} )
  0         0  
139 69 50       367 if $param{early_shutdown};
140              
141             # get the STATUS SPACE header response (and, probably, more)
142 69         685 $obj{_buf} = '';
143 69         174 while (1) {
144 145   100     3904 my $n = sysread $obj{_socket}, my $buf, $param{bufsize} || _DEFAULT_BUFSIZE;
145 145 50       36252950 unless ( defined $n ) {
146 0         0 @obj{qw(_code _error)} = ( 0, "recv response failed: $!" );
147 0         0 goto BLESSING;
148             }
149 145 50       698 if ( $n == 0 ) {
150 0         0 @obj{qw(_code _error)} = ( 0, "recv EOF" );
151 0         0 goto BLESSING;
152             }
153 145         1177 $obj{_buf} .= $buf;
154 145 100       1431 last if length $obj{_buf} >= 3;
155             }
156             # NOTE this is sloppy; there are fewer "full two digit status codes"
157             # defined in the appendix, e.g. only 10, 11, 20, 30, 31, 40, ...
158             # on the other hand, this supports any new extensions to the
159             # existing numbers
160 69 100       3461 if ( $obj{_buf} =~ m/^(([1-6])[0-9])[ ]/ ) {
161 51         1322 @obj{qw(_status _code)} = ( $1, $2 );
162 51         485 substr $obj{_buf}, 0, 3, '';
163             } else {
164             @obj{qw(_code _error)} =
165 18         1404 ( 0, "invalid response " . sprintf "%vx", substr $obj{_buf}, 0, 3 );
166 18         684 goto BLESSING;
167             }
168              
169             # META -- at most 1024 characters, followed by \r\n. the loop is in
170             # the event the server is being naughty and trickling bytes in one
171             # by one (probably you will want a timeout somewhere, or an async
172             # version of this code)
173 51   100     1006 my $bufsize = $param{bufsize} || _DEFAULT_BUFSIZE;
174 51         161 while (1) {
175 18559 100       55302 if ( $obj{_buf} =~ m/^(.{0,1024}?)\r\n/ ) {
176 33         461 $obj{_meta} = $1;
177 33         357 my $len = length $obj{_meta};
178 33 100       380 if ( $len == 0 ) {
179             # special case mentioned in the specification
180 2 50       14 $obj{_meta} = 'text/gemini;charset=utf-8' if $obj{_code} == 2;
181             } else {
182             eval {
183 31         868 $obj{_meta} = Encode::decode( 'UTF-8', $obj{_meta}, Encode::FB_CROAK );
184 31         10140 1;
185 31 50       148 } or do {
186 0         0 @obj{qw(_code _error)} = ( 0, "failed to decode meta: $@" );
187 0         0 goto BLESSING;
188             };
189             # another special case (RFC 2045 says that these things
190             # are not case sensitive, hence the (?i) despite the
191             # gemini specification saying "text/")
192 31 100 66     1080 if ( $obj{_code} == 2
      100        
193             and $obj{_meta} =~ m{^(?i)text/}
194             and $obj{_meta} !~ m/(?i)charset=/ ) {
195 18         91 $obj{_meta} .= ';charset=utf-8';
196             }
197             }
198 33         106 substr $obj{_buf}, 0, $len + 2, ''; # +2 for the \r\n
199 33         173 last;
200             } else {
201 18526         25715 my $len = length $obj{_buf};
202 18526 100       34042 if ( $len > 1024 ) {
203 18         846 @obj{qw(_code _error)} = ( 0, "meta is too long" );
204 18         558 goto BLESSING;
205             }
206 18508         24436 my $buf;
207 18508         53035 my $n = sysread $obj{_socket}, $buf, $bufsize;
208 18508 50       3905116 unless ( defined $n ) {
209 0         0 @obj{qw(_code _error)} = ( 0, "recv response failed: $!" );
210 0         0 goto BLESSING;
211             }
212 18508 50       36460 if ( $n == 0 ) {
213 0         0 @obj{qw(_code _error)} = ( 0, "recv EOF" );
214 0         0 goto BLESSING;
215             }
216 18508         38446 $obj{_buf} .= $buf;
217             }
218             }
219              
220             BLESSING:
221 145 100 100     2537 close $obj{_socket} if defined $obj{_socket} and $obj{_code} != 2;
222 145         32629 bless( \%obj, $class ), $obj{_code};
223             }
224              
225             # utility function that handles redirects and various means of content
226             # collection
227             sub gemini_request {
228 7     7 1 127 my ( $source, %options ) = @_;
229             $options{max_redirects} = _DEFAULT_REDIRECTS
230 7 100       77 unless exists $options{max_redirects};
231             $options{redirect_delay} = _DEFAULT_REDIRECT_SLEEP
232 7 100       47 unless exists $options{redirect_delay};
233             $options{max_size} = _DEFAULT_MAX_CONTENT
234 7 100       38 unless exists $options{max_size};
235              
236 7         29 my ( $gem, $code );
237 7         17 my $redirects = 0;
238             REQUEST:
239             ( $gem, $code ) = Net::Gemini->get( $source,
240 15 100       272 ( exists $options{param} ? %{ $options{param} } : () ) );
  1         7  
241 15 100 66     149 if ( $code == 2 ) {
    100          
242 5         16 my $len = length $gem->{_buf};
243 5   100     35 my $bufsize = $options{bufsize} || _DEFAULT_BUFSIZE;
244             # this can make uninit noise for a meta of ";" which might be
245             # worth an upstream patch?
246 5         30 $gem->{_mime} = [ parse_mime_type( $gem->meta ) ];
247 5 100       370 if ( exists $options{content_callback} ) {
248 1 50       27 if ($len) {
249 1 50       14 $options{content_callback}->( $gem->{_buf}, $len, $gem ) or goto CLEANUP;
250             }
251 0         0 while (1) {
252 0         0 my $buf;
253 0         0 $len = sysread $gem->{_socket}, $buf, $bufsize;
254 0 0       0 if ( !defined $len ) {
    0          
255 0         0 die "sysread failed: $!\n";
256             } elsif ( $len == 0 ) {
257 0         0 last;
258             }
259 0 0       0 $options{content_callback}->( $buf, $len, $gem ) or goto CLEANUP;
260             }
261             } else {
262 4 100       21 if ($len) {
263 3 50       14 if ( $len > $options{max_size} ) {
264 0         0 $gem->{_content} = substr $gem->{_buf}, 0, $options{max_size};
265 0         0 @{$gem}{qw(_code _error)} = ( 0, 'max_size' );
  0         0  
266 0         0 goto CLEANUP;
267             }
268 3         39 $gem->{_content} = $gem->{_buf};
269 3         18 $options{max_size} -= $len;
270             }
271 4         8 while (1) {
272 7         13 my $buf;
273 7         29 $len = sysread $gem->{_socket}, $buf, $bufsize;
274 7 50       332 if ( !defined $len ) {
    100          
275 0         0 die "sysread failed: $!\n";
276             } elsif ( $len == 0 ) {
277 3         15 last;
278             }
279 4 100       15 if ( $len > $options{max_size} ) {
280 1         19 $gem->{_content} .= substr $buf, 0, $options{max_size};
281 1         4 @{$gem}{qw(_code _error)} = ( 0, 'max_size' );
  1         18  
282 1         27 goto CLEANUP;
283             }
284 3         17 $gem->{_content} .= $buf;
285 3         18 $options{max_size} -= $len;
286             }
287             }
288             } elsif ( $code == 3 and ++$redirects <= $options{max_redirects} ) {
289             # a '31' permanent redirect should result in us not requesting
290             # the old URL again, but that would require more code here for
291             # something that is probably rare
292 8         36 my $new = $gem->{_meta};
293 8         65 $source = URI->new_abs( $new, $gem->{_uri} );
294 8         5758425 select( undef, undef, undef, $options{redirect_delay} );
295 8         442 goto REQUEST;
296             }
297             CLEANUP:
298 7         1476 undef $gem->{_buf};
299 7         30 close $gem->{_socket};
300 7         1616 return $gem, $code;
301             }
302              
303             # drain what remains (if anything) via a callback interface. assumes
304             # that a ->get call has been made
305             sub getmore {
306 18     18 1 126 my ( $self, $callback, %param ) = @_;
307              
308 18         39 my $len = length $self->{_buf};
309 18 100       237 if ($len) {
310 17 50       102 $callback->( $self->{_buf}, $len ) or return;
311 17         153 undef $self->{_buf};
312             }
313              
314 18   100     313 my $bufsize = $param{bufsize} || _DEFAULT_BUFSIZE;
315 18         53 while (1) {
316 18         36 my $buf;
317 18         125 $len = sysread $self->{_socket}, $buf, $bufsize;
318 18 50       1673 if ( !defined $len ) {
    100          
319 0         0 die "sysread failed: $!\n";
320             } elsif ( $len == 0 ) {
321 17         391 last;
322             }
323 1 50       5 $callback->( $buf, $len ) or return;
324             }
325 17         136 close $self->{_socket};
326             }
327              
328             1;
329             __END__