File Coverage

blib/lib/Bio/DB/ESoap/WSDL.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # $Id$
2             #
3             # BioPerl module for Bio::DB::ESoap::WSDL
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Mark A. Jensen
8             #
9             # Copyright Mark A. Jensen
10             #
11             # You may distribute this module under the same terms as perl itself
12              
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::DB::ESoap::WSDL - WSDL parsing for Entrez SOAP EUtilities
18              
19             =head1 SYNOPSIS
20              
21             Used by L
22            
23             # url
24             $wsdl = Bio::DB::ESoap::WSDL->new(
25             -url => "http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/v2.0/eutils.wsdl"
26             );
27             # local copy
28             $wsdl = Bio::DB::ESoap::WSDL->new(
29             -wsdl => "local/eutils.wsdl"
30             );
31              
32             %opns = %{ $wsdl->operations };
33            
34              
35             =head1 DESCRIPTION
36              
37             This module is a lightweight parser and container for WSDL XML files
38             associated with the NCBI EUtilities SOAP server. XML facilities are
39             provided by L.
40              
41             The following accessors provide names and structures useful for
42             creating SOAP messages using L (e.g.):
43              
44             service() : the URL of the SOAP service
45             operations() : hashref of the form {.., $operation_name => $soapAction, ...}
46             request_parameters($operation) :
47             request field names and namelists as an array of hashes
48             result_parameters($operation) :
49             result field names and namelists as an array of hashes
50              
51             The following accessors provide L objects pointing at
52             key locations in the WSDL:
53              
54             root : the root of the WSDL docment
55             _types_elt : the element
56             _portType_elt : the element
57             _binding_elt : the element
58             _service_elt : the element
59             _message_elts : an array of all top-level elements
60             _operation_elts : an array of all elements contained in
61            
62             Parsing occurs lazily (on first read, not on construction); all
63             information is cached. To clear the cache and force re-parsing, run
64              
65             $wsdl->clear_cache;
66              
67             The globals C<$NCBI_BASEURL>, C<$NCBI_ADAPTOR>, and C<%WSDL> are exported.
68              
69             $NCBI_ADAPTOR : the soap service cgi
70            
71             To construct a URL for a WSDL:
72              
73             $wsdl_eutils = $NCBI_BASEURL.$WSDL{'eutils'}
74             $wsdl_efetch_omim = $NCBI_BASEURL.$WSDL{'f_omim'}
75             # etc.
76              
77             =head1 FEEDBACK
78              
79             =head2 Mailing Lists
80              
81             User feedback is an integral part of the evolution of this and other
82             Bioperl modules. Send your comments and suggestions preferably to
83             the Bioperl mailing list. Your participation is much appreciated.
84              
85             bioperl-l@bioperl.org - General discussion
86             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
87              
88             =head2 Support
89              
90             Please direct usage questions or support issues to the mailing list:
91              
92             L
93              
94             rather than to the module maintainer directly. Many experienced and
95             reponsive experts will be able look at the problem and quickly
96             address it. Please include a thorough description of the problem
97             with code and data examples if at all possible.
98              
99             =head2 Reporting Bugs
100              
101             Report bugs to the Bioperl bug tracking system to help us keep track
102             of the bugs and their resolution. Bug reports can be submitted via
103             the web:
104              
105             http://redmine.open-bio.org/projects/bioperl/
106              
107             =head1 AUTHOR - Mark A. Jensen
108              
109             Email maj -at- fortinbras -dot- us
110              
111             =head1 APPENDIX
112              
113             The rest of the documentation details each of the object methods.
114             Internal methods are usually preceded with a _
115              
116             =cut
117              
118             package Bio::DB::ESoap::WSDL;
119 2     2   8 use strict;
  2         2  
  2         46  
120              
121 2     2   7 use Bio::Root::Root;
  2         3  
  2         26  
122 2     2   3527 use XML::Twig;
  0            
  0            
123             use Bio::WebAgent;
124             use File::Temp;
125              
126             use base qw(Bio::Root::Root Exporter);
127              
128             our @EXPORT = qw( $NCBI_BASEURL $NCBI_ADAPTOR %WSDL );
129              
130             our $NCBI_BASEURL = "http://www.ncbi.nlm.nih.gov/entrez/eutils/soap/v2.0/";
131             our $NCBI_ADAPTOR = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/soap/v2.0/soap_adapter_2_0.cgi";
132              
133             our %WSDL = (
134             'eutils' => 'eutils.wsdl',
135             'f_pubmed' => 'efetch_pubmed.wsdl',
136             'f_pmc' => 'efetch_pmc.wsdl',
137             'f_nlmc' => 'efetch_nlmc.wsdl',
138             'f_journals' => 'efetch_journals.wsdl',
139             'f_omim' => 'efetch_omim.wsdl',
140             'f_taxon' => 'efetch_taxon.wsdl',
141             'f_snp' => 'efetch_snp.wsdl',
142             'f_gene' => 'efetch_gene.wsdl',
143             'f_seq' => 'efetch_seq.wsdl'
144             );
145              
146             =head2 new
147              
148             Title : new
149             Usage : my $obj = new Bio::DB::ESoap::WSDL();
150             Function: Builds a new Bio::DB::ESoap::WSDL object
151             Returns : an instance of Bio::DB::ESoap::WSDL
152             Args : named args:
153             -URL => $url_of_desired_wsdl -OR-
154             -WSDL => $filename_of_local_wsdl_copy
155             ( -WSDL will take precedence if both specified )
156              
157             =cut
158              
159             sub new {
160             my ($class,@args) = @_;
161             my $self = $class->SUPER::new(@args);
162             my ($url, $wsdl) = $self->_rearrange( [qw( URL WSDL )], @args );
163             my (%sections, %cache);
164             my $doc = 'wsdl:definitions';
165             $sections{'_message_elts'} = [];
166             $sections{'_operation_elts'} = [];
167             $self->_sections(\%sections);
168             $self->_cache(\%cache);
169             $self->_twig(
170             XML::Twig->new(
171             twig_handlers => {
172             $doc => sub { $self->root($_) },
173             "$doc/binding" => sub { $self->_sections->{'_binding_elt'} = $_ },
174             "$doc/binding/operation" => sub { push @{$self->_sections->{'_operation_elts'}},$_ },
175             "$doc/message" => sub { push @{$self->_sections->{'_message_elts'}}, $_ },
176             "$doc/portType" => sub { $self->_sections->{'_portType_elt'} = $_ },
177             "$doc/service" => sub { $self->_sections->{'_service_elt'} = $_ },
178             "$doc/types" => sub { $self->_sections->{'_types_elt'} = $_ },
179             }
180             )
181             );
182             if ($url || $wsdl ) {
183             $self->url($url);
184             $self->wsdl($wsdl);
185             $self->_parse;
186             }
187             return $self;
188             }
189              
190             =head1 Getters
191              
192             =head2 request_parameters()
193              
194             Title : request_parameters
195             Usage : @params = $wsdl->request_parameters($operation_name)
196             Function: get array of request (input) fields required by
197             specified operation, according to the WSDL
198             Returns : hash of arrays of hashes...
199             Args : scalar string (operation or action name)
200              
201             =cut
202              
203             sub request_parameters {
204             my $self = shift;
205             my ($operation) = @_;
206             my $is_action;
207             $self->throw("Operation name must be specified") unless defined $operation;
208             my $opn_hash = $self->operations;
209             unless ( grep /^$operation$/, keys %$opn_hash ) {
210             $is_action = grep /^$operation$/, values %$opn_hash;
211             $self->throw("Operation name '$operation' is not recognized")
212             unless ($is_action);
213             }
214            
215             #check the cache here....
216             return $self->_cache("request_params_$operation") if
217             $self->_cache("request_params_$operation");
218              
219             # find the input message type in the portType elt
220             if ($is_action) {
221             my @a = grep {$$opn_hash{$_} eq $operation} keys %$opn_hash;
222             # note this takes the first match
223             $operation = $a[0];
224             $self->throw("Whaaa??") unless defined $operation;
225             }
226             #check the cache once more after translation....
227             return $self->_cache("request_params_$operation") if
228             $self->_cache("request_params_$operation");
229              
230             my $bookmarks = $self->_operation_bookmarks($operation);
231              
232             my $imsg_elt = $bookmarks->{'i_msg_elt'};
233             my $opn_schema = $bookmarks->{'schema'};
234             my $ret = { $imsg_elt->att('name') => [] };
235            
236             # do a quick recursion:
237             _get_types((values %$ret)[0], $imsg_elt, $opn_schema);
238             return $self->_cache("request_params_$operation", $ret);
239              
240             1;
241             }
242              
243             =head2 result_parameters()
244              
245             Title : result_parameters
246             Usage : $result_hash = $wsdl->result_parameters
247             Function: retrieve a hash structure describing the
248             result of running the specified operation
249             according to the WSDL
250             Returns : hash of arrays of hashes...
251             Args : operation (scalar string)
252              
253             =cut
254              
255             sub result_parameters {
256             my $self = shift;
257             my ($operation) = @_;
258             my $is_action;
259             $self->throw("Operation name must be specified") unless defined $operation;
260             my $opn_hash = $self->operations;
261             unless ( grep /^$operation$/, keys %$opn_hash ) {
262             $is_action = grep /^$operation$/, values %$opn_hash;
263             $self->throw("Operation name '$operation' is not recognized")
264             unless ($is_action);
265             }
266            
267             #check the cache here....
268             return $self->_cache("result_params_$operation") if
269             $self->_cache("result_params_$operation");
270              
271             # find the input message type in the portType elt
272             if ($is_action) {
273             my @a = grep {$$opn_hash{$_} eq $operation} keys %$opn_hash;
274             # note this takes the first match
275             $operation = $a[0];
276             $self->throw("Whaaa??") unless defined $operation;
277             }
278             #check the cache once more after translation....
279             return $self->_cache("result_params_$operation") if
280             $self->_cache("result_params_$operation");
281              
282             # do work
283             my $bookmarks = $self->_operation_bookmarks($operation);
284              
285             # eutilities results seem to be a mixture of xs:string element
286             # and complex types which are just xs:seqs of xs:string elements
287             #
288             # cast these as a hash of hashes...
289              
290             my $omsg_elt = $bookmarks->{'o_msg_elt'};
291             my $opn_schema = $bookmarks->{'schema'};
292             my $ret = { $omsg_elt->att('name') => [] };
293            
294             # do a quick recursion:
295             _get_types((values %$ret)[0], $omsg_elt, $opn_schema);
296             return $self->_cache("result_params_$operation", $ret);
297             }
298              
299             sub response_parameters { shift->result_parameters( @_ ) }
300              
301             =head2 operations()
302              
303             Title : operations
304             Usage : @opns = $wsdl->operations;
305             Function: get a hashref with elts ( $operation_name => $soapAction )
306             for all operations defined by this WSDL
307             Returns : array of scalar strings
308             Args : none
309              
310             =cut
311              
312             sub operations {
313             my $self = shift;
314             return $self->_cache('operations') if $self->_cache('operations');
315             my %opns;
316             foreach (@{$self->_parse->_operation_elts}) {
317             $opns{$_->att('name')} =
318             ($_->descendants('soap:operation'))[0]->att('soapAction');
319             }
320             return $self->_cache('operations', \%opns);
321             }
322              
323             =head2 service()
324              
325             Title : service
326             Usage : $wsdl->service
327             Function: gets the SOAP service url associated with this WSDL
328             Returns : scalar string
329             Args : none
330              
331             =cut
332              
333             sub service {
334             my $self = shift;
335             return $self->_cache('service') ||
336             $self->_cache('service', ($self->_parse->_service_elt->descendants('soap:address'))[0]->att('location'));
337             }
338              
339             =head2 db()
340              
341             Title : db
342             Usage :
343             Function: If this is an efetch WSDL, returns the db name
344             associated with it
345             Returns : scalar string or undef
346             Args : none
347              
348             =cut
349              
350             sub db {
351             my $self = shift;
352             $self->root->namespace('nsef') =~ /efetch_(.*?)$/;
353             return $1;
354             }
355              
356             =head1 Internals
357              
358             =head2 _operation_bookmarks()
359              
360             Title : _operation_bookmarks
361             Usage :
362             Function: find useful WSDL elements associated with the specified
363             operation; return a hashref of the form
364             { $key => $XML_Twig_Elt_obj, }
365             Returns : hashref with keys:
366             portType namespace schema
367             i_msg_type i_msg_elt
368             o_msg_type o_msg_elt
369             Args : operation name (scalar string)
370             Note : will import schema if necessary
371              
372             =cut
373              
374             sub _operation_bookmarks {
375             my $self = shift;
376             my $operation = shift;
377             # check cache
378             return $self->_cache("bookmarks_$operation") if
379             $self->_cache("bookmarks_$operation");
380             # do work
381             my %bookmarks;
382             my $pT_opn = $self->_portType_elt->first_child(
383             qq/ operation[\@name="$operation"] /
384             );
385             my $imsg_type = $pT_opn->first_child('input')->att('message');
386             my $omsg_type = $pT_opn->first_child('output')->att('message');
387              
388             # now lookup the schema element name from among the message elts
389             my ($imsg_elt, $omsg_elt);
390             foreach ( @{$self->_message_elts} ) {
391             my $msg_name = $_->att('name');
392             if ( $imsg_type =~ qr/$msg_name/ ) {
393             $imsg_elt = $_->first_child('part[@element=~/[Rr]equest/]')->att('element');
394             }
395             if ( $omsg_type =~ qr/$msg_name/) {
396             $omsg_elt = $_->first_child('part[@element=~/[Rr]esult/]')->att('element');
397             }
398             last if ($imsg_elt && $omsg_elt);
399             }
400             $self->throw("Can't find request schema element corresponding to '$operation'") unless $imsg_elt;
401             $self->throw("Can't find result schema element corresponding to '$operation'") unless $omsg_elt;
402              
403             # $imsg_elt has a namespace prefix, to lead us to the correct schema
404             # as defined in the wsdl element. Get that schema
405             $imsg_elt =~ /(.*?):/;
406             my $opn_ns = $self->root->namespace($1);
407             my $opn_schema = $self->_types_elt->first_child("xs:schema[\@targetNamespace='$opn_ns']");
408             $opn_schema ||= $self->_types_elt->first_child("xs:schema"); # only one
409             $self->throw("Can't find types schema corresponding to '$operation'") unless defined $opn_schema;
410              
411             # need to import the schema? do it here.
412             if ( my $import_elt = $opn_schema->first_child("xs:import") ) {
413             my $import_url = $NCBI_BASEURL.$import_elt->att('schemaLocation');
414             my $imported = XML::Twig->new();
415             # better error checking here?
416             eval {
417             $imported->parse(Bio::WebAgent->new()->get($import_url)->content);
418             };
419             $self->throw("Schema import failed (tried url '$import_url') : $@") if $@;
420             my $imported_schema = $imported->root;
421             # get included schemata
422             my @included = $imported_schema->children("xs:include");
423             foreach (@included) {
424              
425             my $url = $NCBI_BASEURL.$_->att('schemaLocation');
426             my $incl = XML::Twig->new();
427             eval {
428             $incl->parse( Bio::WebAgent->new()->get($url)->content );
429             };
430             $self->throw("Schema include failed (tried url '$url') : $@") if $@;
431             # cut-n-paste
432             my @incl = $incl->root->children;
433             $_->cut;
434             foreach my $child (@incl) {
435             $child->cut;
436             $child->paste( last_child => $_->former_parent );
437             }
438             }
439            
440             # cut-n-paste
441             $opn_schema->cut;
442             $imported_schema->cut;
443             $imported_schema->paste( first_child => $opn_schema->former_parent );
444             $opn_schema = $imported_schema;
445             }
446            
447              
448             # find the definition of $imsg_elt in $opn_schema
449             $imsg_elt =~ s/.*?://;
450             $imsg_elt = $opn_schema->first_child("xs:element[\@name='$imsg_elt']");
451             $self->throw("Can't find request element definition in schema corresponding to '$operation'") unless defined $imsg_elt;
452             $omsg_elt =~ s/.*?://;
453             $omsg_elt = $opn_schema->first_child("xs:element[\@name='$omsg_elt']");
454             $self->throw("Can't find result element definition in schema corresponding to '$operation'") unless defined $omsg_elt;
455              
456             @bookmarks{qw(portType i_msg_type o_msg_type
457             namespace schema i_msg_elt o_msg_elt ) } =
458             ($pT_opn, $imsg_type, $omsg_type, $opn_ns, $opn_schema,
459             $imsg_elt, $omsg_elt);
460             return $self->_cache("bookmarks_$operation", \%bookmarks);
461            
462             }
463              
464             =head2 _parse()
465              
466             Title : _parse
467             Usage : $wsdl->_parse
468             Function: parse the wsdl at url and create accessors for
469             section twig elts
470             Returns : self
471             Args :
472              
473             =cut
474              
475             sub _parse {
476             my $self = shift;
477             my @args = @_;
478             return $self if $self->_parsed; # already done
479             $self->throw("Neither URL nor WSDL set in object") unless $self->url || $self->wsdl;
480             eval {
481             if ($self->wsdl) {
482             $self->_twig->parsefile($self->wsdl);
483             }
484             else {
485             eval {
486             my $tfh = File::Temp->new(-UNLINK=>1);
487             Bio::WebAgent->new()->get($self->url, ':content_file' => $tfh->filename);
488             $tfh->close;
489             $self->_twig->parsefile($tfh->filename);
490             $self->wsdl($tfh->filename);
491             };
492             $self->throw("URL parse failed : $@") if $@;
493             }
494             };
495             # $self->throw("Parser issue : $@") if $@;
496             die $@ if $@;
497             $self->_set_from_args( $self->_sections,
498             -methods => [qw(_types_elt _message_elts
499             _portType_elt _binding_elt
500             _operation_elts _service_elt)],
501             -create => 1 );
502             $self->_parsed(1);
503             return $self;
504             }
505              
506             =head2 root()
507              
508             Title : root
509             Usage : $obj->root($newval)
510             Function: holds the root Twig elt of the parsed WSDL
511             Example :
512             Returns : value of root (an XML::Twig::Elt)
513             Args : on set, new value (an XML::Twig::Elt or undef, optional)
514              
515             =cut
516              
517             sub root {
518             my $self = shift;
519            
520             return $self->{'root'} = shift if @_;
521             return $self->{'root'};
522             }
523              
524             =head2 url()
525              
526             Title : url
527             Usage : $obj->url($newval)
528             Function: get/set the WSDL url
529             Example :
530             Returns : value of url (a scalar string)
531             Args : on set, new value (a scalar or undef, optional)
532              
533             =cut
534              
535             sub url {
536             my $self = shift;
537            
538             return $self->{'url'} = shift if @_;
539             return $self->{'url'};
540             }
541              
542             =head2 wsdl()
543              
544             Title : wsdl
545             Usage : $obj->wsdl($newval)
546             Function: get/set wsdl XML filename
547             Example :
548             Returns : value of wsdl (a scalar string)
549             Args : on set, new value (a scalar string or undef, optional)
550              
551             =cut
552              
553             sub wsdl {
554             my $self = shift;
555             my $file = shift;
556             if (defined $file) {
557             $self->throw("File not found") unless (-e $file) || (ref $file eq 'File::Temp');
558             return $self->{'wsdl'} = $file;
559             }
560             return $self->{'wsdl'};
561             }
562              
563             =head2 _twig()
564              
565             Title : _twig
566             Usage : $obj->_twig($newval)
567             Function: XML::Twig object for handling the wsdl
568             Example :
569             Returns : value of _twig (a scalar)
570             Args : on set, new value (a scalar or undef, optional)
571              
572             =cut
573              
574             sub _twig {
575             my $self = shift;
576            
577             return $self->{'_twig'} = shift if @_;
578             return $self->{'_twig'};
579             }
580              
581             =head2 _sections()
582              
583             Title : _sections
584             Usage : $obj->_sections($newval)
585             Function: holds hashref of twigs corresponding to main wsdl
586             elements; filled by _parse()
587             Example :
588             Returns : value of _sections (a scalar)
589             Args : on set, new value (a scalar or undef, optional)
590              
591             =cut
592              
593             sub _sections {
594             my $self = shift;
595            
596             return $self->{'_sections'} = shift if @_;
597             return $self->{'_sections'};
598             }
599              
600             =head2 _cache()
601              
602             Title : _cache
603             Usage : $wsdl->_cache($newval)
604             Function: holds the wsdl info cache
605             Example :
606             Returns : value of _cache (a scalar)
607             Args : on set, new value (a scalar or undef, optional)
608              
609             =cut
610              
611             sub _cache {
612             my $self = shift;
613             my ($name, $value) = @_;
614             unless (@_) {
615             return $self->{'_cache'} = {};
616             }
617             if (defined $value) {
618             return $self->{'_cache'}->{$name} = $value;
619             }
620             return $self->{'_cache'}->{$name};
621             }
622              
623             sub clear_cache { shift->_cache() }
624              
625             =head2 _parsed()
626              
627             Title : _parsed
628             Usage : $obj->_parsed($newval)
629             Function: flag to indicate wsdl already parsed
630             Example :
631             Returns : value of _parsed (a scalar)
632             Args : on set, new value (a scalar or undef, optional)
633              
634             =cut
635              
636             sub _parsed {
637             my $self = shift;
638            
639             return $self->{'_parsed'} = shift if @_;
640             return $self->{'_parsed'};
641             }
642              
643             # =head2 _get_types()
644              
645             # Title : _get_types
646             # Usage : very internal
647             # Function: recursively parse through custom types
648             # Returns :
649             # Args : arrayref, XML::Twig::Elt, XML::Twig::Elt
650             # (return array, type element, schema root)
651              
652             # =cut
653              
654             sub _get_types {
655             my ($res, $elt, $sch, $visited) = @_;
656             my $is_choice;
657             $visited ||= [];
658             # assuming max 1 xs:sequence or xs:choice per element
659             my $seq = ($elt->descendants('xs:sequence'))[0];
660             $is_choice = ($seq ? '' : '|');
661             $seq ||= ($elt->descendants('xs:choice'))[0];
662             return 1 unless $seq;
663             foreach ( $seq->descendants('xs:element') ) {
664             for my $type ($_->att('type') || $_->att('ref')) {
665             !defined($type) && do {
666             Bio::Root::Root->throw("neither type nor ref attributes defined; cannot proceed");
667             last;
668             };
669             $type eq 'xs:string' && do {
670             push @$res, { $_->att('name').$is_choice => 1};
671             last;
672             };
673             do { # custom type
674             # find the type def in schema
675             $type =~ s/.*?://; # strip tns
676             if (grep /^$type$/, @$visited) { # check for circularity
677            
678             push @$res, { $_->att('name').$is_choice => "$type(reused)"}if $_->att('name');
679             last;
680             }
681             push @$visited, $type;
682             my $new_elt = $sch->first_child("xs:complexType[\@name='$type']");
683             if (defined $new_elt) {
684             my $new_res = [];
685             push @$res, { $_->att('name').$is_choice => $new_res };
686             _get_types($new_res, $new_elt, $sch, $visited);
687             }
688             else { # a 'ref', make sure it's defined
689             $new_elt = $sch->first_child("xs:element[\@name='$type']");
690             $DB::single=1 unless $new_elt;
691             Bio::Root::Root->throw("type not defined in schema; cannot proceed") unless defined $new_elt;
692             push @$res, { $new_elt->att('name').$is_choice => 1 };
693             }
694             last;
695             }
696             }
697             }
698             return 1;
699             }
700            
701             sub DESTROY {
702             my $self = shift;
703             if (ref($self->wsdl) eq 'File::Temp') {
704             unlink $self->wsdl->filename;
705             }
706             }
707             1;