File Coverage

blib/lib/WebService/CEPH/NetAmazonS3.pm
Criterion Covered Total %
statement 105 108 97.2
branch 26 36 72.2
condition 15 23 65.2
subroutine 19 21 90.4
pod 11 11 100.0
total 176 199 88.4


line stmt bran cond sub pod time code
1             =encoding utf8
2              
3             =head1 WebService::CEPH::NetAmazonS3
4              
5             Driver for CEPH is based on Net::Amazon::S3.
6              
7             Made rather not on the basis Net::Amazon::S3, but on the basis Net::Amazon::S3::Client
8             see POD https://metacpan.org/pod/Net::Amazon::S3::Client
9             There is a separate documentation and it is said that this is a newer interface, while in the Net::Amazon::S3, there is no link to this.
10              
11             Goes into private methods and not documented features of Net :: Amazon :: S3, due to the fact that
12             Net :: Amazon :: S3 is not well-documented in principle, and there is not enough public functionality in it.
13              
14             The stability of this solution is provided by the integration test netamazons3_integration, which in theory tests everything.
15             The problems can only be if you installed this module, then updated Net :: Amazon :: S3 to a new version that did not exist yet,
16             which broke back compatibility of private methods.
17              
18             The interface of this module is documented. Stick to what is documented, WebService :: CEPH counts on all this.
19             You can write your driver with the same interface, but with a different implementation.
20              
21             =cut
22              
23             package WebService::CEPH::NetAmazonS3;
24              
25             our $VERSION = '0.017'; # VERSION
26              
27 4     4   421255 use strict;
  4         16  
  4         116  
28 4     4   21 use warnings;
  4         8  
  4         94  
29 4     4   21 use Carp;
  4         6  
  4         193  
30 4     4   1047 use Time::Local;
  4         3442  
  4         233  
31 4     4   2339 use Net::Amazon::S3;
  4         14308788  
  4         200  
32 4     4   37 use HTTP::Status;
  4         14  
  4         1276  
33 4     4   51 use Digest::MD5 qw/md5_hex/;
  4         9  
  4         7363  
34              
35              
36             sub _time { # for mocking in tests
37             time()
38 0     0   0 }
39              
40             =head2 new
41              
42             Constructor
43              
44             protocol - 'http' or 'https'
45              
46             host - Amazon S3 host or CEPH
47              
48             bucket - (mandatory for all operations except the request for the bucket list) the name of the bucket, this bucket will be used for all object operations
49              
50             key - access key
51              
52             secret - access secret
53              
54             =cut
55              
56             sub new {
57 21     21   131614 my ($class, %args) = @_;
58              
59 21         111 my $self = bless +{}, $class;
60              
61 21   33     235 $self->{$_} = delete $args{$_} // confess "Missing $_" for (qw/protocol host key secret/);
62 21         65 $self->{bucket} = delete $args{bucket};
63              
64 21 50       63 confess "Unused arguments %args" if %args;
65 21 50       149 confess "protocol should be 'http' or 'https'" unless $self->{protocol} =~ /^https?$/;
66              
67             my $s3 = Net::Amazon::S3->new({
68             aws_access_key_id => $self->{key},
69             aws_secret_access_key => $self->{secret}, # TODO: фильтровать в логировании?
70             host => $self->{host},
71 21         964 secure => $self->{protocol} eq 'https',
72             retry => 1,
73             });
74              
75 21         41857 $self->{client} = Net::Amazon::S3::Client->new( s3 => $s3 );
76 21         1076 $self;
77             }
78              
79             =head2 _request_object
80              
81             Private method. Returns the Net :: Amazon :: S3 :: Client :: Bucket object, which can then be used. Used in code several times
82              
83             =cut
84              
85             sub _request_object {
86 10     10   33 my ($self) = @_;
87              
88 10 50       38 confess "Missing bucket" unless $self->{bucket};
89              
90 10         57 $self->{client}->bucket(name => $self->{bucket});
91             }
92              
93             =head2 get_buckets_list
94              
95             Returns buckets list
96              
97             =cut
98              
99             sub get_buckets_list {
100 0     0 1 0 my ($self) = @_;
101              
102 0         0 return $self->{client}->buckets->{buckets};
103             }
104              
105             =head2 upload_single_request
106              
107             Uploads data.
108              
109             Parameters:
110              
111             1) $self
112              
113             2) $key - object name
114              
115             3) data itself (blob)
116              
117             4) Content-Type, optional
118              
119             5) ACL, optional
120              
121             Upload an object for one request (non-multipart upload), put a custom or private ACL, add a custom x-amz-meta-md5 header, which equals md5 hex from the file
122              
123             =cut
124              
125             sub upload_single_request {
126 5     5 1 225 my ($self, $key) = (shift, shift); # after shifts: $_[0] - value, $_[1] - content-type, $_[2] - acl
127              
128 5         36 my $md5 = md5_hex($_[0]);
129 5 100 50     29 my $object = $self->_request_object->object(
130             key => $key,
131             acl_short => $_[2] || 'private',
132             $_[1] ? ( content_type => $_[1] ) : ()
133             );
134 5         2333 $object->user_metadata->{'md5'} = $md5;
135 5         79 $object->_put($_[0], length($_[0]), $md5); # private _put so we can re-use md5. only for that.
136             }
137              
138             =head2 list_multipart_uploads
139              
140             Returns a list multipart_upload
141              
142             Parameters:
143              
144             none
145              
146             Returns:
147             [
148             {
149             key => 'Upload key',
150             upload_id => 'Upload ID',
151             initiated => 'Init date',
152             initiated_epoch => same as initiated but in epoch time format
153             initiated_age_seconds => simply time() - initiated_epoch ie upload age
154             },
155             ...
156             ]
157              
158             =cut
159              
160             sub list_multipart_uploads {
161 1     1 1 279 my ($self) = @_;
162              
163 1         53 $self->{client}->bucket(name => $self->{bucket});
164              
165             my $http_request = Net::Amazon::S3::HTTPRequest->new(
166             s3 => $self->{client}->s3,
167             method => 'GET',
168 1         470 path => $self->{bucket} . '?uploads',
169             )->http_request;
170              
171 1         5912 my $xpc = $self->{client}->_send_request_xpc($http_request);
172              
173 1         64838 my @uploads;
174 1         8 my $t0 = _time();
175 1         29 foreach my $node ( $xpc->findnodes(".//s3:Upload") ) {
176              
177 1         101 my $initiated = $xpc->findvalue( ".//s3:Initiated", $node );
178              
179 1 50       349 my ($y, $m, $d, $hour, $min, $sec) = $initiated =~ /^(\d{4})\-(\d{2})\-(\d{2})T(\d{2}):(\d{2}):(\d{2})/
180             or confess "Bad date $initiated";
181 1         67 my $initiated_epoch = timegm($sec, $min, $hour, $d, $m - 1, $y); # interpret time as GMT+00 time and convert to epoch
182              
183 1         95 push @uploads, {
184             key => $xpc->findvalue( ".//s3:Key", $node ),
185             upload_id => $xpc->findvalue( ".//s3:UploadId", $node ),
186             initiated => $initiated,
187             initiated_epoch => $initiated_epoch,
188             initiated_age_seconds => $t0 - $initiated_epoch,
189             };
190              
191             }
192              
193 1         180 return \@uploads;
194             }
195              
196             =head2 delete_multipart_upload
197              
198             Deletes upload
199              
200             Parameters:
201             $key, $upload_id
202              
203              
204             =cut
205              
206             sub delete_multipart_upload {
207 1     1 1 68 my ($self, $key, $upload_id) = @_;
208              
209 1         11 $self->{client}->bucket(name => $self->{bucket});
210              
211             my $http_request = Net::Amazon::S3::Request::AbortMultipartUpload->new(
212             s3 => $self->{client}->s3,
213             bucket => $self->{bucket},
214 1         212 key => $key,
215             upload_id => $upload_id,
216             )->http_request;
217              
218 1         5750 $self->{client}->_send_request_raw($http_request);
219             }
220              
221             =head2 initiate_multipart_upload
222              
223             Initiates multipart upload
224              
225             Parameters:
226              
227             1) $self
228              
229             2) $key - object name
230              
231             3) md5 from data
232              
233             4) Content-type, optional
234              
235             5) ACL, optional
236              
237             Initiates multipart upload, sets x-amz-meta-md5 to md5 value of the file (needs to be calculated in advance and pass it as a parameter).
238             Returns a reference to a structure of an undocumented nature, which should be used later to work with this multipart upload
239              
240             =cut
241              
242             sub initiate_multipart_upload {
243 2     2 1 119 my ($self, $key, $md5, $content_type, $acl) = @_;
244              
245 2 50       22 confess "Missing bucket" unless $self->{bucket};
246              
247 2   50     13 my $object = $self->_request_object->object( key => $key, acl_short => $acl || 'private' );
248              
249             my $http_request = Net::Amazon::S3::Request::InitiateMultipartUpload->new(
250             s3 => $self->{client}->s3,
251             bucket => $self->{bucket},
252 2 100       881 key => $key,
253             headers => +{
254             'X-Amz-Meta-Md5' => $md5,
255             $content_type ? ( 'Content-type' => $content_type ) : ()
256             }
257             )->http_request;
258              
259 2         8526 my $xpc = $self->{client}->_send_request_xpc($http_request);
260 2         12545 my $upload_id = $xpc->findvalue('//s3:UploadId');
261 2 50       253 confess "Couldn't get upload id from initiate_multipart_upload response XML"
262             unless $upload_id;
263              
264 2         125 +{ key => $key, upload_id => $upload_id, object => $object, md5 => $md5};
265             }
266              
267             =head2 upload_part
268              
269             Uploads part of the data when multipart uploading
270              
271             Parameters:
272              
273             1) $self
274              
275             2) $multipart_upload - reference, obtained from initiate_multipart_upload
276              
277             3) $part_number - part number, from 1 and higher.
278              
279             Works only if parts were uploaded in turn with increasing numbers (which is natural, if it is sequential uploading,
280             and makes it impossible for parallel upload from different processes)
281              
282             Returns nothing
283              
284             =cut
285              
286             sub upload_part {
287 2     2 1 17 my ($self, $multipart_upload, $part_number) = (shift, shift, shift);
288              
289             $multipart_upload->{object}->put_part(
290             upload_id => $multipart_upload->{upload_id},
291 2         22 part_number => $part_number,
292             value => $_[0]
293             );
294              
295             # TODO:Part numbers should be in accessing order (in case someone uploads in parallel) !
296 2   100     21163 push @{$multipart_upload->{parts} ||= [] }, $part_number;
  2         38  
297 2   100     6 push @{$multipart_upload->{etags} ||= [] }, md5_hex($_[0]);
  2         38  
298             }
299              
300             =head2 complete_multipart_upload
301              
302             Finalize multipart upload
303              
304             Parameters:
305              
306             1) $self
307              
308             2) $multipart_upload - reference, obtained from initiate_multipart_upload
309              
310             returns nothing. throws an exception, if something is wrong.
311              
312             =cut
313              
314             sub complete_multipart_upload {
315 1     1 1 9 my ($self, $multipart_upload) = @_;
316              
317             $multipart_upload->{object}->complete_multipart_upload(
318             upload_id => $multipart_upload->{upload_id},
319             etags => $multipart_upload->{etags},
320             part_numbers => $multipart_upload->{parts}
321 1         8 );
322             }
323              
324             =head2 download_with_range
325              
326             Downloads an object with the HTTP Range header (ie, part of the data).
327              
328             Parameters:
329              
330             1) $self
331              
332             2) $key - object name
333              
334             3) $first - first byte for Range
335              
336             4) $last - last byte for Range
337              
338             If $first, $last are missing or undef, the entire file is downloaded, without the Range header
339              
340             If $last is missing, downloads data from a specific position to the end (as well as in the Range specification).
341              
342             If the object is missing, returns an empty list. If other error - an exception.
343              
344             Returns:
345              
346             1) Scalar Ref on downloaded data
347              
348             2) The number of remaining bytes that can still be downloaded (or undef, if $first parameter was not present)
349              
350             3) ETag header with deleted quotes (or undef if it is missing)
351              
352             4) X-Amz-Meta-Md5 header (or undef if it is missing)
353              
354             =cut
355              
356             sub download_with_range {
357 5     5 1 213 my ($self, $key, $first, $last) = @_;
358              
359 5 50       21 confess "Missing bucket" unless $self->{bucket};
360              
361             # TODO: How and when to validate ETag here?
362             my $http_request = Net::Amazon::S3::Request::GetObject->new(
363             s3 => $self->{client}->s3,
364             bucket => $self->{bucket},
365 5         160 key => $key,
366             method => 'GET',
367             )->http_request;
368              
369 5 100       18163 if (defined $first) {
370 1   50     47 $last //= '';
371 1         5 $http_request->headers->header("Range", "bytes=$first-$last");
372             }
373              
374 5         340 my $http_response = $self->{client}->_send_request_raw($http_request);
375             #print $http_request->as_string, $http_response->as_string ;
376 5 100 100     27229 if ( $http_response->code == 404 && $http_response->decoded_content =~ m!<Code>NoSuchKey</Code>!) {
    100          
377 1         2982 return;
378             }
379             elsif (is_error($http_response->code)) {
380 1         615 my ($err) = $http_response->decoded_content =~ m!<Code>(.*)</Code>!;
381 1   50     493 $err //= 'none';
382 1         5 confess "Unknown error ".$http_response->code." $err";
383             } else {
384 3         80 my $left = undef;
385 3 100       12 if (defined $first) {
386 1   33     4 my $range = $http_response->header('Content-Range') // confess;
387 1 50       54 my ($f, $l, $total) = $range =~ m!bytes (\d+)\-(\d+)/(\d+)! or confess;
388 1         4 $left = $total - ( $l + 1);
389             }
390              
391 3         9 my $etag = $http_response->header('ETag');
392 3 50       151 if ($etag) {
393 3         14 $etag =~ s/^"//;
394 3         12 $etag =~ s/"$//;
395             }
396              
397 3         11 my $custom_md5 = $http_response->header('X-Amz-Meta-Md5');
398              
399 3         215 return (\$http_response->decoded_content, $left, $etag, $custom_md5);
400             }
401             }
402              
403             =head2 size
404              
405             Gets the size of the object using the HTTP HEAD request.
406              
407             Parameters:
408              
409             1) $self
410              
411             2) $key - object name
412              
413             If the object does not exist, it returns undef. If other error - an exception. Returns size in bytes.
414              
415             =cut
416              
417             sub size {
418 4     4 1 162 my ($self, $key) = @_;
419              
420 4 50       21 confess "Missing bucket" unless $self->{bucket};
421              
422             my $http_request = Net::Amazon::S3::Request::GetObject->new(
423             s3 => $self->{client}->s3,
424             bucket => $self->{bucket},
425 4         128 key => $key,
426             method => 'HEAD',
427             )->http_request;
428              
429 4         13922 my $http_response = $self->{client}->_send_request_raw($http_request);
430 4 100       21317 if ( $http_response->code == 404) { # It's not possible to distinct between NoSuchkey and NoSuchBucket??
    100          
431 1         23 return undef;
432             }
433             elsif (is_error($http_response->code)) {
434 1         25 confess "Unknown error ".$http_response->code;
435             }
436             else {
437 2   100     51 return $http_response->header('Content-Length') // 0;
438             }
439              
440              
441              
442             }
443              
444             =head2 delete
445              
446             Deletes an object
447              
448             Parameters:
449              
450             1) $self
451              
452             2) $key - object name
453              
454             Returns nothing. If the object did not exist, does not signal about it.
455              
456             =cut
457              
458             sub delete {
459 1     1 1 46 my ($self, $key) = @_;
460              
461 1         6 $self->_request_object->object( key => $key )->delete;
462             }
463              
464             =head2 query_string_authentication_uri
465              
466             Returns Query String Authentication URL for key $key, with expire time $expires
467              
468             =cut
469              
470             sub query_string_authentication_uri {
471 2     2 1 18 my ($self, $key, $expires) = @_;
472              
473 2         8 $self->_request_object->object( key => $key, expires => $expires )->query_string_authentication_uri;
474             }
475              
476              
477             1;