File Coverage

lib/WWW/Curl/UserAgent.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package WWW::Curl::UserAgent;
2             {
3             $WWW::Curl::UserAgent::VERSION = '0.9.6';
4             }
5              
6             # ABSTRACT: UserAgent based on libcurl
7              
8 1     1   31071 use Moose;
  0            
  0            
9             use v5.10;
10              
11             use WWW::Curl::Easy;
12             use WWW::Curl::Multi;
13             use HTTP::Request;
14             use HTTP::Response;
15             use Time::HiRes;
16             use IO::Select;
17              
18             use WWW::Curl::UserAgent::Handler;
19             use WWW::Curl::UserAgent::Request;
20              
21             # timeout in milliseconds
22             has timeout => (
23             is => 'rw',
24             isa => 'Int',
25             default => 0,
26             );
27              
28             # connection timeout in milliseconds
29             has connect_timeout => (
30             is => 'rw',
31             isa => 'Int',
32             default => 300,
33             );
34              
35             # maximum of requests done in parallel
36             has parallel_requests => (
37             is => 'rw',
38             isa => 'Int',
39             default => 5,
40             );
41              
42             # use connection keep-alive
43             has keep_alive => (
44             is => 'rw',
45             isa => 'Bool',
46             default => 1,
47             );
48              
49             # follow redirects
50             has followlocation => (
51             is => 'ro',
52             isa => 'Bool',
53             default => 0,
54             );
55              
56             # maximum number of redirects
57             has max_redirects => (
58             is => 'ro',
59             isa => 'Int',
60             default => -1,
61             );
62              
63             # identifier in each request
64             has user_agent_string => (
65             is => 'rw',
66             isa => 'Str',
67             default => sub { "www.curl.useragent/$WWW::Curl::UserAgent::VERSION" },
68             );
69              
70             has _curl_multi => (
71             is => 'ro',
72             isa => 'WWW::Curl::Multi',
73             default => sub { WWW::Curl::Multi->new },
74             );
75              
76             has _handler_queue => (
77             is => 'ro',
78             isa => 'ArrayRef[WWW::Curl::UserAgent::Handler]',
79             default => sub { [] },
80             traits => ['Array'],
81             handles => {
82             add_handler => 'push',
83             _get_handler_from_queue => 'shift',
84             _has_handler_in_queue => 'count',
85             request_queue_size => 'count',
86             }
87             );
88              
89             has _active_handler_map => (
90             is => 'ro',
91             isa => 'HashRef[WWW::Curl::UserAgent::Handler]',
92             default => sub { {} },
93             traits => ['Hash'],
94             handles => {
95             _active_handlers => 'count',
96             _set_active_handler => 'set',
97             _get_active_handler => 'delete',
98             }
99             );
100              
101             has _max_private_id => (
102             is => 'ro',
103             isa => 'Num',
104             default => 1,
105             traits => ['Counter'],
106             handles => { _inc_private_id => 'inc', }
107             );
108              
109             sub request {
110             my ( $self, $request, %args ) = @_;
111              
112             my $timeout = $args{timeout} // $self->timeout;
113             my $connect_timeout = $args{connect_timeout} // $self->connect_timeout;
114             my $keep_alive = $args{keep_alive} // $self->keep_alive;
115             my $followlocation = $args{followlocation} // $self->followlocation;
116             my $max_redirects = $args{max_redirects} // $self->max_redirects;
117              
118             my $response;
119             $self->add_handler(
120             WWW::Curl::UserAgent::Handler->new(
121             on_success => sub {
122             my ( $req, $res ) = @_;
123             $response = $res;
124             },
125             on_failure => sub {
126             my ( $req, $msg, $desc ) = @_;
127             $response = HTTP::Response->new( 500, $msg, [], $desc );
128             },
129             request => WWW::Curl::UserAgent::Request->new(
130             http_request => $request,
131             connect_timeout => $connect_timeout,
132             timeout => $timeout,
133             keep_alive => $keep_alive,
134             followlocation => $followlocation,
135             max_redirects => $max_redirects,
136             ),
137             )
138             );
139             $self->perform;
140              
141             return $response;
142             }
143              
144             sub add_request {
145             my ( $self, %args ) = @_;
146              
147             my $on_success = $args{on_success};
148             my $on_failure = $args{on_failure};
149             my $request = $args{request};
150             my $timeout = $args{timeout} // $self->timeout;
151             my $connect_timeout = $args{connect_timeout} // $self->connect_timeout;
152             my $keep_alive = $args{keep_alive} // $self->keep_alive;
153             my $followlocation = $args{followlocation} // $self->followlocation;
154             my $max_redirects = $args{max_redirects} // $self->max_redirects;
155              
156             my $handler = WWW::Curl::UserAgent::Handler->new(
157             on_success => $on_success,
158             on_failure => $on_failure,
159             request => WWW::Curl::UserAgent::Request->new(
160             http_request => $request,
161             connect_timeout => $connect_timeout,
162             timeout => $timeout,
163             keep_alive => $keep_alive,
164             followlocation => $followlocation,
165             max_redirects => $max_redirects,
166             ),
167             );
168             $self->add_handler($handler);
169              
170             return $handler;
171             }
172              
173             sub perform {
174             my $self = shift;
175              
176             my $active_handlers;
177              
178             # activate handlers by draining the queue
179             while ( $active_handlers = $self->_drain_handler_queue ) {
180              
181             # loop until there is a response available
182             $self->_wait_for_response($active_handlers);
183              
184             # execute callbacks for all received responses
185             $self->_perform_callbacks;
186             }
187             }
188              
189             sub _wait_for_response {
190             my $self = shift;
191             my $active_handlers = shift;
192              
193             my $curl_multi = $self->_curl_multi;
194              
195             while ( $curl_multi->perform == $active_handlers ) {
196             Time::HiRes::nanosleep(1);
197             my @select = map {
198             my $s = IO::Select->new;
199             $s->add( @{$_} );
200             $s;
201             } ( $curl_multi->fdset );
202             IO::Select->select( @select, 0.1 );
203             }
204             }
205              
206             sub _perform_callbacks {
207             my $self = shift;
208              
209             while ( my ( $active_transfer_id, $return_code ) = $self->_curl_multi->info_read ) {
210              
211             unless ($active_transfer_id) {
212             Time::HiRes::nanosleep(1); # do not eat the whole cpu
213             next;
214             }
215              
216             my $handler = $self->_get_active_handler($active_transfer_id);
217             my $request = $handler->request;
218             my $curl_easy = $request->curl_easy;
219              
220             if ( $return_code == 0 ) {
221              
222             # Assume the final http request is the original one
223             my $final_http_request = $request->http_request();
224             my $effective_url = $curl_easy->getinfo( CURLINFO_EFFECTIVE_URL );
225              
226             # Handle redirection last effective Request so
227             # the response is always link to the last request in effect.
228             if( $effective_url && ( $final_http_request->uri().'' ne $effective_url ) ){
229             $final_http_request = HTTP::Request
230             ->new($final_http_request->method,
231             $effective_url,
232             $final_http_request->headers(),
233             $final_http_request->content());
234             }
235              
236             my $response = $self->_build_http_response( ${ $request->header_ref }, ${ $request->content_ref }, $final_http_request );
237             $handler->on_success->( $request->http_request, $response, $curl_easy );
238             }
239             else {
240             $handler->on_failure->(
241             $request->http_request, $curl_easy->strerror($return_code),
242             $curl_easy->errbuf, $curl_easy
243             );
244             }
245             }
246             }
247              
248             sub _drain_handler_queue {
249             my $self = shift;
250              
251             while ( $self->_has_handler_in_queue && $self->_active_handlers < $self->parallel_requests ) {
252             $self->_activate_handler( $self->_get_handler_from_queue );
253             }
254              
255             return $self->_active_handlers;
256             }
257              
258             sub _activate_handler {
259             my $self = shift;
260             my $handler = shift;
261              
262             # set up curl easy
263             $self->_inc_private_id;
264             my $private_id = $self->_max_private_id;
265             my $easy = $handler->request->curl_easy;
266             $easy->setopt( CURLOPT_PRIVATE, $private_id );
267             $easy->setopt( CURLOPT_USERAGENT, $self->user_agent_string );
268              
269             # reference the handler on its handler id (CURLOPT_PRIVATE)
270             $self->_set_active_handler( $private_id => $handler );
271              
272             # finally add the curl easy to curl multi
273             $self->_curl_multi->add_handle($easy);
274             }
275              
276             sub _build_http_response {
277             my $self = shift;
278             my $header = shift;
279             my $content = shift;
280             my $final_http_request = shift;
281              
282             # PUT requests may contain continue header
283             my @header = split "\r\n\r\n", $header;
284              
285             my $response = HTTP::Response->parse($header[-1]);
286              
287             if( $final_http_request ){
288             # Inject this in the response to behave more like LWP::UserAgent
289             $response->request($final_http_request);
290             }
291              
292             $response->content($content) if defined $content;
293              
294             # message might include a bad char
295             my $message = $response->message;
296             $response->message($message)
297             if $message =~ s/\r//g;
298              
299             return $response;
300             }
301              
302             no Moose;
303             __PACKAGE__->meta->make_immutable;
304             1;
305              
306             __END__
307              
308             =pod
309              
310             =head1 NAME
311              
312             WWW::Curl::UserAgent - UserAgent based on libcurl
313              
314             =head1 VERSION
315              
316             version 0.9.6
317              
318             =head1 SYNOPSIS
319              
320             use HTTP::Request;
321             use WWW::Curl::UserAgent;
322              
323             my $ua = WWW::Curl::UserAgent->new(
324             timeout => 10000,
325             connect_timeout => 1000,
326             );
327              
328             $ua->add_request(
329             request => HTTP::Request->new( GET => 'http://search.cpan.org/' ),
330             on_success => sub {
331             my ( $request, $response ) = @_;
332             if ($response->is_success) {
333             print $response->content;
334             }
335             else {
336             die $response->status_line;
337             }
338             },
339             on_failure => sub {
340             my ( $request, $error_msg, $error_desc ) = @_;
341             die "$error_msg: $error_desc";
342             },
343             );
344             $ua->perform;
345              
346             =head1 DESCRIPTION
347              
348             C<WWW::Curl::UserAgent> is a web user agent based on libcurl. It can be used
349             easily with C<HTTP::Request> and C<HTTP::Response> objects and handler
350             callbacks. For an easier interface there is also a method to map a single
351             request to a response.
352              
353             C<WWW::Curl> is used for the power of libcurl, which e.g. handles connection
354             keep-alive, parallel requests, asynchronous callbacks and much more. This
355             package was written, because C<WWW::Curl::Simple> does not handle keep-alive
356             correctly and also does not consider PUT, HEAD and other request methods like
357             DELETE.
358              
359             There is a simpler interface too, which just returns a C<HTTP::Response> for a
360             given C<HTTP::Request>, named request(). The normal approach to use this
361             library is to add as many requests with callbacks as your code allows to do and
362             run C<perform> afterwards. Then the callbacks will be executed sequentially
363             when the responses arrive beginning with the first received response. The
364             simple method request() does not support this of course, because there are no
365             callbacks defined.
366              
367             This library is in production use on L<https://www.xing.com>.
368              
369             =head1 CONSTRUCTOR METHODS
370              
371             The following constructor methods are available:
372              
373             =over 4
374              
375             =item $ua = WWW::Curl::UserAgent->new( %options )
376              
377             This method constructs a new C<WWW::Curl::UserAgent> object and returns it.
378             Key/value pair arguments may be provided to set up the initial state.
379             The default values should be based on the default values of libcurl.
380             The following options correspond to attribute methods described below:
381              
382             KEY DEFAULT
383             ----------- --------------------
384             user_agent_string www.curl.useragent/$VERSION
385             connect_timeout 300
386             timeout 0
387             parallel_requests 5
388             keep_alive 1
389             followlocation 0
390             max_redirects -1
391              
392             =back
393              
394             =head1 ATTRIBUTES
395              
396             =over
397              
398             =item $ua->connect_timeout / $ua->connect_timeout($connect_timeout)
399              
400             Get/set the timeout in milliseconds waiting for the response to be received. If the
401             response is not received within the timeout the on_failure handler is called.
402              
403             =item $ua->timeout / $ua->timeout($timeout)
404              
405             Get/set the timeout in milliseconds waiting for the response to be received. If the
406             response is not received within the timeout the on_failure handler is called.
407              
408             =item $ua->parallel_requests / $ua->parallel_requests($parallel_requests)
409              
410             Get/set the number of the maximum of requests performed in parallel. libcurl
411             itself may use less requests than this number but not more.
412              
413             =item $ua->keep_alive / $ua->keep_alive($boolean)
414              
415             Get/set if TCP connections should be reused with keep-alive. Therefor the
416             TCP connection is forced to be closed after receiving the response and the
417             corresponding header "Connection: close" is set. If keep-alive is enabled
418             (default) libcurl will handle the connections.
419              
420             =item $ua->followlocation / $ua->followlocation($boolean)
421              
422             Get/set if curl should follow redirects. The headers of the redirect respones
423             are thrown away while redirecting, so that the final response will be passed
424             into the corresponding handler.
425              
426             =item $ua->max_redirects / $ua->max_redirects($max_redirects)
427              
428             Get/set the maximum amount of redirects. -1 (default) means infinite redirects.
429             0 means no redirects at all. If the maximum redirect is reached the on_failure
430             handler will be called.
431              
432             =item $ua->user_agent_string / $ua->user_agent_string($user_agent)
433              
434             Get/set the user agent submitted in each request.
435              
436             =item $ua->request_queue_size
437              
438             Get the size of the not performed requests.
439              
440             =item $ua->request( $request, %args )
441              
442             Perform immediately a single C<HTTP::Request>. Parameters can be submitted
443             optionally, which will override the user agents settings for this single
444             request. Possible options are:
445              
446             connect_timeout
447             timeout
448             keep_alive
449             followlocation
450             max_redirects
451              
452             Some examples for a request
453              
454             my $request = HTTP::Request->new( GET => 'http://search.cpan.org/');
455              
456             $response = $ua->request($request);
457             $response = $ua->request($request,
458             timeout => 3000,
459             keep_alive => 0,
460             );
461              
462             If there is an error e.g. like a timeout the corresponding C<HTTP::Response>
463             object will have the statuscode 500, the short error description as message
464             and a longer message description as content. It runs perform() internally, so
465             queued requests will be performed, too.
466              
467             =item $ua->add_request(%args)
468              
469             Adds a request with some callback handler on receiving messages. The on_success
470             callback will be called for every successful read response, even those
471             containing error codes. The on_failure handler will be called when libcurl
472             reports errors, e.g. timeouts or bad curl settings. The parameters
473             C<request>, C<on_success> and C<on_failure> are mandatory. Optional are
474             C<timeout>, C<connect_timeout>, C<keep_alive>, C<followlocation> and
475             C<max_redirects>.
476              
477             $ua->add_request(
478             request => HTTP::Request->new( GET => 'http://search.cpan.org/'),
479             on_success => sub {
480             my ( $request, $response, $easy ) = @_;
481             print $request->as_string;
482             print $response->as_string;
483             },
484             on_failure => sub {
485             my ( $request, $err_msg, $err_desc, $easy ) = @_;
486             # error handling
487             }
488             );
489              
490             The callbacks provide as last parameter a C<WWW:Curl::Easy> object which was
491             used to perform the request. This can be used to obtain some informations like
492             statistical data about the request.
493              
494             Chaining of C<add_request> calls is a feature of this module. If you add a
495             request within an C<on_success> handler it will be immediately executed when
496             the callback is executed. This can be useful to immediately react on a
497             response:
498              
499             $ua->add_request(
500             request => HTTP::Request->new( POST => 'http://search.cpan.org/', [], $form ),
501             on_failure => sub { die },
502             on_success => sub {
503             my ( $request, $response ) = @_;
504              
505             my $target_url = get_target_from($response);
506             $ua->add_request(
507             request => HTTP::Request->new( GET => $target_url ),
508             on_failure => sub { die },
509             on_success => sub {
510             my ( $request, $response ) = @_;
511             # actually do sth.
512             }
513             );
514             },
515             );
516             $ua->perform; # executes both requests
517              
518             =item $ua->add_handler($handler)
519              
520             To have more control over the handler you can add a C<WWW::Curl::UserAgent::Handler>
521             by yourself. The C<WWW::Curl::UserAgent::Request> inside of the handler needs
522             all parameters provided to libcurl as mandatory to prevent defining duplicates of
523             default values. Within the C<WWW::Curl::UserAgent::Request> is the possiblity to
524             modify the C<WWW::Curl::Easy> object before it gets performed.
525              
526             my $handler = WWW::Curl::UserAgent::Handler->new(
527             on_success => sub {
528             my ( $request, $response, $easy ) = @_;
529             print $request->as_string;
530             print $response->as_string;
531             },
532             on_failure => sub {
533             my ( $request, $err_msg, $err_desc, $easy ) = @_;
534             # error handling
535             }
536             request => WWW::Curl::UserAgent::Request->new(
537             http_request => HTTP::Request->new( GET => 'http://search.cpan.org/'),
538             connect_timeout => $ua->connect_timeout,
539             timeout => $ua->timeout,
540             keep_alive => $ua->keep_alive,
541             followlocation => $ua->followlocation,
542             max_redirects => $ua->max_redirects,
543             ),
544             );
545              
546             $handler->request->curl_easy->setopt( ... );
547              
548             $ua->add_handler($handler);
549              
550             =item $ua->perform
551              
552             Perform all queued requests. This method will return after all responses have
553             been received and handler have been processed.
554              
555             =back
556              
557             =head1 BENCHMARK
558              
559             A test with the tools/benchmark.pl script against loadbalanced webserver
560             performing a get requests to a simple echo API on an Intel i5 M 520 with
561             Fedora 18 gave the following results:
562              
563             500 requests (sequentially, 500 iterations):
564             +--------------------------+-----------+------+------+------------+------------+
565             | User Agent | Wallclock | CPU | CPU | Requests | Iterations |
566             | | seconds | usr | sys | per second | per second |
567             +--------------------------+-----------+------+------+------------+------------+
568             | LWP::Parallel::UserAgent | 14 | 0.91 | 0.30 | 35.7 | 413.2 |
569             +--------------------------+-----------+------+------+------------+------------+
570             | LWP::UserAgent | 15 | 1.00 | 0.30 | 33.3 | 384.6 |
571             +--------------------------+-----------+------+------+------------+------------+
572             | WWW::Curl::Simple | 15 | 0.68 | 0.35 | 33.3 | 485.4 |
573             +--------------------------+-----------+------+------+------------+------------+
574             | WWW::Curl::UserAgent | 8 | 0.52 | 0.06 | 62.5 | 862.1 |
575             +--------------------------+-----------+------+------+------------+------------+
576              
577             500 requests (5 in parallel, 100 iterations):
578             +--------------------------+-----------+-------+-------+------------+------------+
579             | User Agent | Wallclock | CPU | CPU | Requests | Iterations |
580             | | seconds | usr | sys | per second | per second |
581             +--------------------------+-----------+-------+-------+------------+------------+
582             | LWP::Parallel::UserAgent | 9 | 1.37 | 0.34 | 55.6 | 58.5 |
583             +--------------------------+-----------+-------+-------+------------+------------+
584             | WWW::Curl::Simple | 135 | 57.61 | 19.85 | 3.7 | 1.3 |
585             +--------------------------+-----------+-------+-------+------------+------------+
586             | WWW::Curl::UserAgent | 2 | 0.40 | 0.09 | 250.0 | 204.1 |
587             +--------------------------+-----------+-------+-------+------------+------------+
588              
589             =head1 SEE ALSO
590              
591             See L<HTTP::Request> and L<HTTP::Response> for a description of the
592             message objects dispatched and received. See L<HTTP::Request::Common>
593             and L<HTML::Form> for other ways to build request objects.
594              
595             See L<WWW::Curl> for a description of the settings and options possible
596             on libcurl.
597              
598             =head1 AUTHORS
599              
600             =over 4
601              
602             =item *
603              
604             Julian Knocke
605              
606             =item *
607              
608             Othello Maurer
609              
610             =back
611              
612             =head1 COPYRIGHT AND LICENSE
613              
614             This software is copyright (c) 2013 by XING AG.
615              
616             This is free software; you can redistribute it and/or modify it under
617             the same terms as the Perl 5 programming language system itself.
618              
619             =cut