File Coverage

blib/lib/Bio/Das/Request.pm
Criterion Covered Total %
statement 127 164 77.4
branch 30 70 42.8
condition 5 14 35.7
subroutine 37 41 90.2
pod 28 29 96.5
total 227 318 71.3


line stmt bran cond sub pod time code
1             package Bio::Das::Request;
2             # encapsulates a request on a DAS server
3             # also knows how to deal with response
4             # $Id: Request.pm,v 1.12 2004/01/03 00:23:40 lstein Exp $
5              
6             =head1 NAME
7              
8             Bio::Das::Request - Base class for a request on a DAS server
9              
10             =head1 SYNOPSIS
11              
12             my $dsn = $request->dsn;
13             my $das_command = $request->command;
14             my $successful = $request->is_success;
15             my $error_msg = $request->error;
16             my @results = $request->results;
17             my ($username,$password) = $request->auth;
18              
19             =head1 DESCRIPTION
20              
21             Each type of request on a DAS server (e.g. an entry_points request) is
22             a subclass of Bio::Das::Request. The request encapsulates the
23             essential information on the request: the server, the data source, and
24             the command that will be executed. After the request is sent to the
25             server, the request object will contain information pertinent to the
26             outcome of the request, including the success status, the results if
27             successful, and an error message if not successful.
28              
29             Subclasses of Bio::Das::Request include L<Bio::Das::Request::Dsn>,
30             L<Bio::Das::Request::Entry_points>, L<Bio::Das::Request::Features>,
31             L<Bio::Das::Request::Stylesheet>, and L<Bio::Das::Request::Types>.
32              
33             Creating the appropriate request is done automatically by L<Bio::Das>.
34             Ordinarily you will not have to create a Bio::Das::Request manually.
35              
36             =head2 METHODS
37              
38             Following is a complete list of methods implemented by
39             Bio::Das::Request.
40              
41             =over 4
42              
43             =cut
44              
45 1     1   5 use strict;
  1         1  
  1         29  
46              
47 1     1   4 use Bio::Das::Util;
  1         2  
  1         35  
48 1     1   11240 use HTML::Parser;
  1         19566  
  1         71  
49 1     1   2446 use Compress::Zlib;
  1         86339  
  1         272  
50 1     1   8 use Carp qw/croak confess/;
  1         2  
  1         52  
51              
52 1     1   5 use constant GZIP_MAGIC => 0x1f8b;
  1         3  
  1         64  
53 1     1   4 use constant OS_MAGIC => 0x03;
  1         2  
  1         44  
54 1     1   5 use constant DASVERSION => 0.95;
  1         1  
  1         40  
55              
56 1     1   4 use overload '""' => 'url';
  1         2  
  1         9  
57              
58             my %DAS_error_codes = (
59             200=>'OK, data follows',
60             400=>'Bad command',
61             401=>'Bad data source',
62             402=>'Bad command arguments',
63             403=>'Bad reference object',
64             404=>'Bad stylesheet',
65             405=>'Coordinate error',
66             500=>'Server error',
67             501=>'Unimplemented feature',
68             );
69              
70             =item $request = Bio::Das::Request->new(-dsn=>$dsn,-args=>$args,-callback=>$callback)
71              
72             Create a new Bio::Das::Request objects. The B<-dsn> argument points
73             to the DAS DSN (full form, including hostname). B<-callback> points
74             to an optional coderef that will be invoked for every object returned
75             during execution of the request. B<-args> points to a hashref
76             containing request-specific arguments.
77              
78             This method is trivially overridden by many of the request subclasses
79             in order to accept arguments that are specific to each of the
80             requests, such as -segments.
81              
82             =cut
83              
84             # -dsn dsn object
85             # -args e.g. { segment => [qw(ZK154 M7 CHROMOSOME_I:1000000,2000000)] }
86             # -callback code ref to be invoked when each "object" is finished parsing
87             sub new {
88 6     6 1 16 my $package = shift;
89 6         36 my ($dsn,$args,$callback) = rearrange(['dsn',
90             'args',
91             'callback'
92             ],@_);
93 6 50       60 $dsn = Bio::Das::DSN->new($dsn) unless ref $dsn;
94 6   100     23 $args ||= {};
95 6         71 return bless {
96             dsn => $dsn,
97             args => $args,
98             callback => $callback,
99             results => [], # list of objects to return
100             p_success => 0,
101             p_error => '',
102             p_compressed_stream => 0,
103             p_xml_parser => undef,
104             },$package;
105             }
106              
107             =item $command = $request->command
108              
109             The command() method returns the DAS command that will be invoked.
110             This varies from subclass to subclass. For example,
111             Bio::Das::Request::Types->command() will return "types."
112              
113             =cut
114              
115             # == to be overridden in subclasses ==
116             # provide the command name (e.g. 'types')
117             sub command {
118 0     0 1 0 my $self = shift;
119 0         0 die "command() must be implemented in subclass";
120             }
121              
122             =item $url = $request->url
123              
124             Return the URL for the request on the DAS server.
125              
126             =cut
127              
128             # == Generate the URL request ==
129             sub url {
130 271     271 1 493 my $self = shift;
131 271         756 my $url = $self->dsn->url;
132 271         922 my $command = $self->command;
133              
134 271 50       598 if (defined $command) {
135 271         621 $url .= "/$command";
136             }
137              
138 271         880 $url;
139             }
140              
141             =item $dsn = $request->dsn([$new_dsn])
142              
143             Get the DAS DSN associated with the request. This method is also used
144             internally to change the DSN.
145              
146             =cut
147              
148             # get/set the DSN
149             sub dsn {
150 296     296 1 439 my $self = shift;
151 296         492 my $d = $self->{dsn};
152 296 50       654 $self->{dsn} = shift if @_;
153 296         4435 $d;
154             }
155              
156             =item $host = $request->host
157              
158             Returns the host associated with the request. This is simply
159             delegated to the DSN object's host() method.
160              
161             =cut
162              
163 6     6 1 28 sub host { shift->dsn->host }
164              
165             # == status ==
166              
167             =item $flag = $request->is_success
168              
169             After the request is executed, is_success() will return true if the
170             request was successfully issued and parsed, false otherwise. If
171             false, you can retrieve an informative error message using the error()
172             method.
173              
174             =cut
175              
176             # after the request is finished, is_success() will return true if successful
177 7     7 1 645 sub is_success { shift->success; }
178              
179             =item $message = $request->error
180              
181             If the request was unsuccessful, error() will return an error message.
182             In the case of a successful request, the result of error() is
183             undefined and should not be relied on.
184              
185             Error messages have the format "NNN XXXXXXXX" where "NNN" is a numeric
186             status code, and XXXXXXX is a human-readable error message. The
187             following error messages are possible:
188              
189             400 Bad command
190             401 Bad data source
191             402 Bad command arguments
192             403 Bad reference object
193             404 Bad stylesheet
194             405 Coordinate error
195             410 Unknown host
196             411 Couldn't connect
197             412 Communications error
198             413 Authentication scheme 'xxxx" is not supported
199             500 Server error
200             501 Unimplemented feature
201             502 No X-Das-Version header
202             503 Invalid X-Das-Version header
203             504 DAS server is too old
204             505 No X-Das-Status header
205             506 Data decompression failure
206              
207             =cut
208              
209             # error() will give the most recent error message
210             sub error {
211 0     0 1 0 my $self = shift;
212 0 0       0 if (@_) {
213 0         0 $self->{p_error} = shift;
214 0         0 return;
215             } else {
216 0         0 return $self->{p_error};
217             }
218             }
219              
220             =item @results = $request->results
221              
222             In a list context this method returns the accumulated results from the
223             DAS request. The contents of the results list is dependent on the
224             particular request, and you should consult each of the subclasses to
225             see what exactly is returned.
226              
227             In a scalar context, this method will return an array reference.
228              
229             =cut
230              
231             sub results {
232 7     7 1 9 my $self = shift;
233 7 50       23 my $r = $self->{results} or return;
234 7 50       213 return wantarray ? @$r : $r;
235             }
236              
237             =item ($username,$password) = $request->auth([$username,$password])
238              
239             Get or set the username and password that will be used for
240             authentication in this request. This is used internally by the
241             L<Bio::Das::HTTP::Fetch> class and should not ordinarily be
242             manipulated by application code.
243              
244             =cut
245              
246             sub auth {
247 6     6 1 10 my $self = shift;
248 6         8 my ($username,$password) = @_;
249 6 50       13 if ($username) {
250 0         0 $self->{auth} = [$username,$password];
251             }
252 6 50       36 return unless $self->{auth};
253 0         0 return @{$self->{auth}};
  0         0  
254             }
255              
256             =item $parser = $request->create_parser()
257              
258             This method creates an HTML::Parser object that will be used to parse
259             the incoming XML data. Ordinarily this will not be called by
260             application code.
261              
262             =cut
263              
264             # create an initiliazed HTML::Parser object
265             sub create_parser {
266 6     6 1 12 my $self = shift;
267             return HTML::Parser->new(
268             api_version => 3,
269 12645     12645   24723 start_h => [ sub { $self->tag_starts(@_) },'tagname,attr' ],
270 12645     12645   26381 end_h => [ sub { $self->tag_stops(@_) },'tagname' ],
271 6     24710   199 text_h => [ sub { $self->char_data(@_) },'dtext' ],
  24710         50916  
272             );
273             }
274              
275             =item $request->tag_starts
276              
277             This method is called internally during the parse to handle a start
278             tag. It should not be called by application code.
279              
280             =cut
281              
282             # tags will be handled by a method named t_TAGNAME
283             sub tag_starts {
284 12645     12645 1 13489 my $self = shift;
285 12645         19366 my ($tag,$attrs) = @_;
286 12645         15424 my $method = "t_$tag";
287 12645         15579 $self->{char_data} = ''; # clear char data
288 12645 100       50725 $self->can($method)
289             ? $self->$method($attrs)
290             : $self->do_tag($tag,$attrs);
291             }
292              
293             =item $request->tag_stops
294              
295             This method is called internally during the parse to handle a stop
296             tag. It should not be called by application code.
297              
298             =cut
299              
300             # tags will be handled by a method named t_TAGNAME
301             sub tag_stops {
302 12645     12645 1 13061 my $self = shift;
303 12645         13786 my $tag = shift;
304 12645         15290 my $method = "t_$tag";
305 12645 100       49184 $self->can($method)
306             ? $self->$method()
307             : $self->do_tag($tag);
308             }
309              
310             =item $request->do_tag
311              
312             This method is called internally during the parse to handle a tag. It
313             should not be called by application code, but can be overridden by a
314             subclass to provide tag-specific processing.
315              
316             =cut
317              
318             sub do_tag {
319 0     0 1 0 my $self = shift;
320 0         0 my ($tag,$attrs) = @_;
321             # do nothing
322             }
323              
324              
325             =item $request->char_data
326              
327             This method is called internally during the parse to handle character
328             data. It should not be called by application code.
329              
330             =cut
331              
332             sub char_data {
333 32820     32820 1 35361 my $self = shift;
334 32820 100 66     119442 if (@_ && length(my $text = shift)>0) {
335 24710         133571 $self->{char_data} .= $text;
336             } else {
337 8110         18667 $self->trim($self->{char_data});
338             }
339             }
340              
341             =item $request->cleanup
342              
343             This method is called internally at the end of the parse to handle any
344             cleanup that is needed. The default behavior is to do nothing, but it
345             can be overridden by a subclass to provide more sophisticated
346             processing.
347              
348             =cut
349              
350             sub cleanup {
351 4     4 1 7 my $self = shift;
352             }
353              
354             =item $request->clear_results
355              
356             This method is called internally at the start of the parse to clear
357             any accumulated results and to get ready for a new parse.
358              
359             =cut
360              
361             sub clear_results {
362 6     6 1 24 shift->{results} = [];
363             }
364              
365             =item $request->add_objects(@objects)
366              
367             This method is called internally during the parse to add one or more
368             objects (e.g. a Bio::Das::Feature) to the results list.
369              
370             =cut
371              
372             # add one or more objects to our results list
373             sub add_object {
374 19     19 0 21 my $self = shift;
375 19 50       57 if (my $cb = $self->callback) {
376 0         0 eval {$cb->(@_)};
  0         0  
377 0 0       0 warn $@ if $@;
378             } else {
379 19         18 push @{$self->{results}},@_;
  19         103  
380             }
381             }
382              
383             # == ACCESSORS ==
384              
385             =item $parser = $request->xml_parser([$new_parser])
386              
387             Internal accessor for getting or setting the XML parser object used in
388             processing the request.
389              
390             =cut
391              
392             # get/set the HTML::Parser object
393             sub xml_parser {
394 138     138 1 776 my $self = shift;
395 138         223 my $d = $self->{p_xml_parser};
396 138 100       311 $self->{p_xml_parser} = shift if @_;
397 138         410 $d;
398             }
399              
400             =item $flag = $request->compressed([$new_flag])
401              
402             Internal accessor for getting or setting the compressed data stream
403             flag. This is true when processing a compressed data stream, such as
404             GZIP compression.
405              
406             =cut
407              
408             # get/set stream compression flag
409             sub compressed {
410 114     114 1 191 my $self = shift;
411 114         195 my $d = $self->{p_compressed_stream};
412 114 50       242 $self->{p_compressed_stream} = shift if @_;
413 114         285 $d;
414             }
415              
416             =item $flag = $request->success([$new_flag])
417              
418             Internal accessor for getting or setting the success flag. This is
419             the read/write version of is_success(), and should not be used by
420             application code.
421              
422             =cut
423              
424             # get/set success flag
425             sub success {
426 13     13 1 19 my $self = shift;
427 13         21 my $d = $self->{p_success};
428 13 100       35 $self->{p_success} = shift if @_;
429 13         41 $d;
430             }
431              
432              
433             =item $callback = $request->callback([$new_callback])
434              
435             Internal accessor for getting or setting the callback code that will
436             be used to process objects as they are generated by the parse.
437              
438             =cut
439              
440             # get/set callback
441             sub callback {
442 1516     1516 1 1945 my $self = shift;
443 1516         1978 my $d = $self->{callback};
444 1516 50       2732 $self->{callback} = shift if @_;
445 1516         9477 $d;
446             }
447              
448             =item $args = $request->args([$new_args])
449              
450             Internal accessor for getting or setting the CGI arguments that will
451             be passed to the DAS server. The arguments are a hashref in which the
452             keys and values correspond to the CGI parameters. Multivalued CGI
453             parameters are represented as array refs.
454              
455             =cut
456              
457             # get/set the request arguments
458             sub args {
459 12     12 1 22 my $self = shift;
460 12         29 my $d = $self->{args};
461 12 50       37 $self->{args} = shift if @_;
462 12         78 $d;
463             }
464              
465             =item $method = $request->method
466              
467             This method can be overridden by subclasses to force the
468             L<Bio::Das::HTTP::Fetch> object to use a particular HTTP request
469             method. Possible values that this method can return are "AUTO", "GET"
470             or "POST." The base class returns a value of "AUTO," allowing the
471             L<Bio::Das::HTTP::Fetch> object to choose the most appropriate request
472             method.
473              
474             =cut
475              
476             # return the method - currently "auto"
477             sub method {
478 6     6 1 14 my $self = shift;
479 6         44 return 'AUTO';
480             }
481              
482             # == Parser stuff ==
483              
484             =item $request->headers($das_header_data)
485              
486             The headers() method is called internally to parse the HTTP headers
487             returned by the DAS server. The data is a hashref in which the keys
488             and values correspond to the HTTP headers and their values.
489              
490             =cut
491              
492             # handle the headers
493             sub headers {
494 6     6 1 13 my $self = shift;
495 6         21 my $hashref = shift;
496              
497             # check the DAS header
498 6 50       26 my $protocol = $hashref->{'X-Das-Version'} or
499             return $self->error('502 No X-Das-Version header');
500              
501 6 50       59 my ($version) = $protocol =~ m!(?:DAS/)?([\d.]+)! or
502             return $self->error('503 Invalid X-Das-Version header');
503              
504 6 50       35 $version >= DASVERSION or
505 0         0 return $self->error("504 DAS server is too old. Got $version; require at least ${\DASVERSION}");
506              
507             # check the DAS status
508 6 50       21 my $status = $hashref->{'X-Das-Status'} or
509             return $self->error('505 No X-Das-Status header');
510              
511 6 50       30 $status =~ /200/ or
512             return $self->error("$status $DAS_error_codes{$status}");
513              
514 6 50 33     26 $self->compressed(1) if exists $hashref->{'Content-Encoding'} &&
515             $hashref->{'Content-Encoding'} =~ /gzip/;
516              
517 6         26 1; # we passed the tests, so we continue to parse
518             }
519              
520             =item $request->start_body()
521              
522             This internal method is called by L<Bio::Das::HTTP::Fetch> upon first
523             encountering the DAS document body data. The method calls
524             create_parser() to create the appropriately-initialized HTML::Parser
525             object and stores it internally using the xml_parser() accessor.
526              
527             =cut
528              
529             # called to do initialization after receiving the header
530             # but before processing any body data
531             sub start_body {
532 6     6 1 9 my $self = shift;
533 6         46 $self->xml_parser($self->create_parser);
534 6         28 $self->xml_parser->xml_mode(1);
535 6         16 return $self->xml_parser;
536             }
537              
538             =item $request->body($data)
539              
540             This internal method is called by L<Bio::Das::HTTP::Fetch> to process
541             each chunk of DAS document data. The data is processed incrementally
542             in multiple steps until the end of document is reached.
543              
544             =cut
545              
546             # called to process body data
547             sub body {
548 114     114 1 186 my $self = shift;
549 114         267 my $data = shift;
550 114 50       316 my $parser = $self->xml_parser or return;
551 114         132 my $status;
552 114 50       321 if ($self->compressed) {
553 0         0 ($data,$status) = $self->inflate($data);
554 0 0       0 return unless $status;
555             }
556 114         1606 return $parser->parse($data);
557             }
558              
559             =item $request->finish_body()
560              
561             This internal method is called by L<Bio::Das::HTTP::Fetch> when the
562             end of document is encountered.
563              
564             =cut
565              
566             # called to finish body data
567             sub finish_body {
568 6     6 1 12 my $self = shift;
569 6         30 $self->cleanup();
570 6 50       18 my $parser = $self->xml_parser or return;
571 6         37 my $result = $parser->eof;
572 6         39 $self->success(1);
573 6         18 1;
574             }
575              
576             =item ($inflated_data,$status) = $request->inflate($data)
577              
578             This internal method is called when processing compressed data. It
579             returns a two-element list consisting of the inflated data and a
580             true/false status code. A false status code means an error was
581             encountered during inflation, and ordinarily causes the parsing to
582             terminate.
583              
584             =cut
585              
586             # == inflation stuff ==
587             sub inflate {
588 0     0 1 0 my $self = shift;
589 0         0 my $compressed_data = shift;
590              
591             # the complication here is that we might be called on a portion of the
592             # data stream that contains only a partial header. This is unlikely, but
593             # I'll be paranoid.
594 0 0       0 if (!$self->{p_i}) { # haven't created the inflator yet
595 0         0 $self->{p_gzip_header} .= $compressed_data;
596 0         0 my $cd = $self->{p_gzip_header};
597 0 0       0 return ('',1) if length $cd < 10;
598              
599             # process header
600 0         0 my ($gzip_magic,$gzip_method,$comment,$time,undef,$os_magic)
601             = unpack("nccVcc",substr($cd,0,10));
602              
603 0 0       0 return $self->error("506 Data decompression failure (not a gzip stream)")
604             unless $gzip_magic == GZIP_MAGIC;
605 0 0       0 return $self->error("506 Data decompression failure (unknown compression method)")
606             unless $gzip_method == Z_DEFLATED;
607              
608 0         0 substr($cd,0,10) = ''; # truncate the rest
609              
610             # handle embedded comments that proceed deflated stream
611             # note that we do not correctly buffer here, but assume
612             # that we've got it all. We don't bother doing this right,
613             # because the filename field is not usually present in
614             # the on-the-fly streaming done by HTTP servers.
615 0 0 0     0 if ($comment == 8 or $comment == 10) {
616 0         0 my ($fname) = unpack("Z*",$cd);
617 0         0 substr($cd,0,(length $fname)+1) = '';
618             }
619              
620 0         0 $compressed_data = $cd;
621 0         0 delete $self->{p_gzip_header};
622              
623 0 0       0 $self->{p_i} = inflateInit(-WindowBits => -MAX_WBITS() ) or return;
624             }
625              
626 0         0 my ($out,$status) = $self->{p_i}->inflate($compressed_data);
627 0 0 0     0 return $self->error("506 Data decompression failure (inflation failed, errcode = $status)")
628             unless $status == Z_OK or $status == Z_STREAM_END;
629              
630 0         0 return ($out,1);
631             }
632              
633             =item $trimmed_string = $request->trim($untrimmed_string)
634              
635             This internal method strips leading and trailing whitespace from a
636             string.
637              
638             =cut
639              
640             # utilities
641             sub trim {
642 8126     8126 1 8817 my $self = shift;
643 8126         14572 my $string = shift;
644 8126         23955 $string =~ s/^\s+//;
645 8126         12077 $string =~ s/\s+$//;
646 8126         48422 $string;
647             }
648              
649             =back
650              
651             =head2 The Parsing Process
652              
653             This module and its subclasses use an interesting object-oriented way
654             of parsing XML documents that is flexible without imposing a large
655             performance penalty.
656              
657             When a tag start or tag stop is encountered, the tag and its
658             attributes are passed to the tag_starts() and tag_stops() methods
659             respectively. These methods both look for a defined method called
660             t_TAGNAME (where TAGNAME is replaced by the actual name of the tag).
661             If the method exists it is invoked, otherwise the tag and attribute
662             data are passed to the do_tag() method, which by default simply
663             ignores the tag.
664              
665             A Bio::Das::Request subclass that wishes to process the
666             E<lt>FOOBARE<gt> tag, can therefore define a method called t_FOOBAR
667             which takes two arguments, the request object and the tag attribute
668             hashref. The method can distinguish between E<lt>FOOBARE<gt> and
669             E<lt>/FOOBARE<gt> by looking at the attribute argument, which will be
670             defined for the start tag and undef for the end tag. Here is a simple
671             example:
672              
673             sub t_FOOBAR {
674             my $self = shift;
675             my $attributes = shift;
676             if ($attributes) {
677             print "FOOBAR is starting with the attributes ",join(' ',%$attributes),"\n";
678             } else {
679             print "FOOBAR is ending\n";
680             }
681             }
682              
683             The L<Bio::Das::Request::Dsn> subclass is a good example of a simple
684             parser that uses t_TAGNAME methods exclusively.
685             L<Bio::Das::Request::Stylesheet> is an example of a parser that also
686             overrides do_tag() in order to process unanticipated tags.
687              
688             =head1 AUTHOR
689              
690             Lincoln Stein <lstein@cshl.org>.
691              
692             Copyright (c) 2001 Cold Spring Harbor Laboratory
693              
694             This library is free software; you can redistribute it and/or modify
695             it under the same terms as Perl itself. See DISCLAIMER.txt for
696             disclaimers of warranty.
697              
698             =head1 SEE ALSO
699              
700             L<Bio::Das::Request>, L<Bio::Das::HTTP::Fetch>,
701             L<Bio::Das::Segment>, L<Bio::Das::Type>, L<Bio::Das::Stylesheet>,
702             L<Bio::Das::Source>, L<Bio::RangeI>
703              
704             =cut
705              
706             1;