File Coverage

Bio/DB/SwissProt.pm
Criterion Covered Total %
statement 9 109 8.2
branch 0 44 0.0
condition 0 25 0.0
subroutine 3 14 21.4
pod 10 10 100.0
total 22 202 10.8


line stmt bran cond sub pod time code
1             #
2             #
3             # BioPerl module for Bio::DB::SwissProt
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Jason Stajich
8             #
9             # Copyright Jason Stajich
10             #
11             # You may distribute this module under the same terms as perl itself
12              
13             # POD documentation - main docs before the code
14             # Reworked to use Bio::DB::WebDBSeqI 2000-12-11
15              
16             =head1 NAME
17              
18             Bio::DB::SwissProt - Database object interface to SwissProt retrieval
19              
20             =head1 SYNOPSIS
21              
22             use Bio::DB::SwissProt;
23              
24             $sp = Bio::DB::SwissProt->new();
25              
26             $seq = $sp->get_Seq_by_id('KPY1_ECOLI'); # SwissProt ID
27             # <4-letter-identifier>_
28             # or ...
29             $seq = $sp->get_Seq_by_acc('P43780'); # SwissProt AC
30             # [OPQ]xxxxx
31              
32              
33             # In fact in this implementation
34             # these methods call the same webscript so you can use
35             # then interchangeably
36              
37             # choose a different server to query
38             $sp = Bio::DB::SwissProt->new('-servertype' => 'expasy',
39             '-hostlocation' => 'us');
40              
41             $seq = $sp->get_Seq_by_id('BOLA_HAEIN'); # SwissProtID
42              
43             =head1 DESCRIPTION
44              
45             SwissProt is a curated database of proteins managed by the Swiss
46             Bioinformatics Institute. Additional tools for
47             parsing and manipulating swissprot files can be found at
48             ftp://ftp.ebi.ac.uk/pub/software/swissprot/Swissknife/.
49              
50             Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the
51             SwissProt database via an Expasy retrieval.
52              
53             In order to make changes transparent we have host type (currently only
54             expasy) and location (default to Switzerland) separated out. This
55             allows the user to pick the closest Expasy mirror for running their
56             queries.
57              
58              
59             =head1 FEEDBACK
60              
61             =head2 Mailing Lists
62              
63             User feedback is an integral part of the evolution of this and other
64             Bioperl modules. Send your comments and suggestions preferably to one
65             of the Bioperl mailing lists. Your participation is much appreciated.
66              
67              
68             bioperl-l@bioperl.org - General discussion
69             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
70              
71             =head2 Support
72              
73             Please direct usage questions or support issues to the mailing list:
74              
75             I
76              
77             rather than to the module maintainer directly. Many experienced and
78             reponsive experts will be able look at the problem and quickly
79             address it. Please include a thorough description of the problem
80             with code and data examples if at all possible.
81              
82             =head2 Reporting Bugs
83              
84             Report bugs to the Bioperl bug tracking system to help us keep track
85             the bugs and their resolution. Bug reports can be submitted via the
86             web:
87              
88             https://github.com/bioperl/bioperl-live/issues
89              
90             =head1 AUTHOR - Jason Stajich
91              
92             Email Jason Stajich Ejason@bioperl.org E
93              
94             Thanks go to Alexandre Gattiker Egattiker@isb-sib.chE of Swiss
95             Institute of Bioinformatics for helping point us in the direction of
96             the correct expasy scripts and for swissknife references.
97              
98             Also thanks to Heikki Lehvaslaiho Eheikki-at-bioperl-dot-orgE
99             for help with adding EBI swall server.
100              
101             =head1 APPENDIX
102              
103             The rest of the documentation details each of the object
104             methods. Internal methods are usually preceded with a _
105              
106             =cut
107              
108             # Let the code begin...
109              
110             package Bio::DB::SwissProt;
111 1     1   6 use strict;
  1         2  
  1         26  
112              
113 1     1   5 use HTTP::Request::Common;
  1         2  
  1         58  
114             our $MODVERSION = '0.8.1';
115              
116 1     1   5 use base qw(Bio::DB::WebDBSeqI);
  1         2  
  1         1262  
117              
118             # global vars
119             our $DEFAULTSERVERTYPE = 'ebi';
120             our $DEFAULTFORMAT = 'swissprot';
121             # our $DEFAULTIDTRACKER = 'http://www.expasy.ch';
122              
123             # you can add your own here theoretically.
124             our %HOSTS = (
125             'expasy' => {
126             'default' => 'us',
127             'baseurl' => 'http://%s/cgi-bin/sprot-retrieve-list.pl',
128             'hosts' =>
129             {
130             'switzerland' => 'ch.expasy.org',
131             'canada' => 'ca.expasy.org',
132             'china' => 'cn.expasy.org',
133             'taiwan' => 'tw.expasy.org',
134             'australia' => 'au.expasy.org',
135             'korea' => 'kr.expasy.org',
136             'us' => 'us.expasy.org',
137             },
138             # ick, CGI variables
139             'jointype' => ' ',
140             'idvar' => 'list',
141             'basevars' => [ ],
142             },
143             'ebi' => {
144             'default' => 'uk',
145             'baseurl' => 'http://%s/Tools/dbfetch/dbfetch',
146             'hosts' => {
147             'uk' => 'www.ebi.ac.uk',
148             },
149             'jointype' => ',',
150             'idvar' => 'id',
151             'basevars' => [ 'db' => 'UniProtKB',
152             'style' => 'raw' ],
153             }
154             );
155              
156             our %ID_MAPPING_DATABASES = map {$_ => 1} qw(
157             ACC+ID ACC ID UPARC NF50 NF90 NF100 EMBL_ID EMBL PIR UNIGENE_ID P_ENTREZGENEID
158             P_GI P_IPI P_REFSEQ_AC PDB_ID DISPROT_ID HSSP_ID DIP_ID MEROPS_ID PEROXIBASE_ID
159             PPTASEDB_ID REBASE_ID TCDB_ID 2DBASE_ECOLI_ID AARHUS_GHENT_2DPAGE_ID
160             ANU_2DPAGE_ID DOSAC_COBS_2DPAGE_ID ECO2DBASE_ID WORLD_2DPAGE_ID ENSEMBL_ID
161             ENSEMBL_PRO_ID ENSEMBL_TRS_ID P_ENTREZGENEID GENOMEREVIEWS_ID KEGG_ID TIGR_ID
162             UCSC_ID VECTORBASE_ID AGD_ID ARACHNOSERVER_ID BURULIST_ID CGD CYGD_ID
163             DICTYBASE_ID ECHOBASE_ID ECOGENE_ID EUHCVDB_ID FLYBASE_ID GENECARDS_ID
164             GENEDB_SPOMBE_ID GENEFARM_ID H_INVDB_ID HGNC_ID HPA_ID LEGIOLIST_ID LEPROMA_ID
165             LISTILIST_ID MAIZEGDB_ID MIM_ID MGI_ID MYPULIST_ID NMPDR ORPHANET_ID PHARMGKB_ID
166             PHOTOLIST_ID PSEUDOCAP_ID RGD_ID SAGALIST_ID SGD_ID SUBTILIST_ID TAIR_ID
167             TUBERCULIST_ID WORMBASE_ID WORMPEP_ID XENBASE_ID ZFIN_ID EGGNOG_ID OMA_ID
168             ORTHODB_ID BIOCYC_ID REACTOME_ID CLEANEX_ID GERMONLINE_ID DRUGBANK_ID
169             NEXTBIO_ID);
170              
171             # new modules should be a little more lightweight and
172             # should use Bio::Root::Root
173             sub new {
174 0     0 1   my ($class, @args) = @_;
175 0           my $self = $class->SUPER::new(@args);
176              
177 0           my ($format, $hostlocation,$servertype) =
178             $self->_rearrange([qw(FORMAT HOSTLOCATION SERVERTYPE)],
179             @args);
180              
181 0 0 0       if( $format && $format !~ /(swiss)|(fasta)/i ) {
182 0           $self->warn("Requested Format $format is ignored because only SwissProt and Fasta formats are currently supported");
183 0           $format = $self->default_format;
184             }
185 0 0         $servertype = $DEFAULTSERVERTYPE unless $servertype;
186 0           $servertype = lc $servertype;
187 0           $self->servertype($servertype);
188 0 0         if ( $hostlocation ) {
189 0           $self->hostlocation(lc $hostlocation);
190             }
191              
192 0           $self->request_format($format); # let's always override the format, as it must be swiss or fasta
193 0           return $self;
194             }
195              
196             =head2 Routines from Bio::DB::RandomAccessI
197              
198             =cut
199              
200             =head2 get_Seq_by_id
201              
202             Title : get_Seq_by_id
203             Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
204             Function: Gets a Bio::Seq object by its name
205             Returns : a Bio::Seq object
206             Args : the id (as a string) of a sequence
207             Throws : "id does not exist" exception
208              
209             =cut
210              
211             =head2 get_Seq_by_acc
212              
213             Title : get_Seq_by_acc
214             Usage : $seq = $db->get_Seq_by_acc('X77802');
215             Function: Gets a Bio::Seq object by accession number
216             Returns : A Bio::Seq object
217             Args : accession number (as a string)
218             Throws : "acc does not exist" exception
219              
220             =cut
221              
222             =head2 get_Stream_by_id
223              
224             Title : get_Stream_by_id
225             Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
226             Function: Gets a series of Seq objects by unique identifiers
227             Returns : a Bio::SeqIO stream object
228             Args : $ref : a reference to an array of unique identifiers for
229             the desired sequence entries
230              
231             =cut
232              
233             =head2 get_Stream_by_acc
234              
235             Title : get_Stream_by_acc
236             Usage : $seq = $db->get_Seq_by_acc([$acc1, $acc2]);
237             Function: Gets a series of Seq objects by accession numbers
238             Returns : a Bio::SeqIO stream object
239             Args : $ref : a reference to an array of accession numbers for
240             the desired sequence entries
241             Note : For GenBank, this just calls the same code for get_Stream_by_id()
242              
243             =cut
244              
245             =head2 get_Stream_by_batch
246              
247             Title : get_Stream_by_batch
248             Usage : $seq = $db->get_Stream_by_batch($ref);
249             Function: Retrieves Seq objects from SwissProt 'en masse', rather than one
250             at a time. This is implemented the same way as get_Stream_by_id,
251             but is provided here in keeping with access methods of NCBI
252             modules.
253             Example :
254             Returns : a Bio::SeqIO stream object
255             Args : $ref : either an array reference, a filename, or a filehandle
256             from which to get the list of unique ids/accession numbers.
257              
258             NOTE: deprecated API. Use get_Stream_by_id() instead.
259              
260             =cut
261              
262             *get_Stream_by_batch = sub {
263 0     0     my $self = shift;
264 0           $self->deprecated('get_Stream_by_batch() is deprecated; use get_Stream_by_id() instead');
265 0           $self->get_Stream_by_id(@_)
266             };
267              
268             =head2 Implemented Routines from Bio::DB::WebDBSeqI interface
269              
270             =cut
271              
272             =head2 get_request
273              
274             Title : get_request
275             Usage : my $url = $self->get_request
276             Function: returns a HTTP::Request object
277             Returns :
278             Args : %qualifiers = a hash of qualifiers (ids, format, etc)
279              
280             =cut
281              
282             sub get_request {
283 0     0 1   my ($self, @qualifiers) = @_;
284 0           my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
285             @qualifiers);
286              
287 0 0         if( !defined $uids ) {
288 0           $self->throw("Must specify a value for uids to query");
289             }
290 0           my ($f,undef) = $self->request_format($format);
291              
292             my %vars = (
293 0           @{$HOSTS{$self->servertype}->{'basevars'}},
  0            
294             ( 'format' => $f )
295             );
296              
297 0           my $url = $self->location_url;
298              
299 0           my $uid;
300 0   0       my $jointype = $HOSTS{$self->servertype}->{'jointype'} || ' ';
301 0   0       my $idvar = $HOSTS{$self->servertype}->{'idvar'} || 'id';
302              
303 0 0         if( ref($uids) =~ /ARRAY/i ) {
304             # HTTP::Request automagically converts the ' ' to %20
305 0           $uid = join($jointype, @$uids);
306             } else {
307 0           $uid = $uids;
308             }
309 0           $vars{$idvar} = $uid;
310              
311 0           return POST $url, \%vars;
312             }
313              
314             =head2 postprocess_data
315              
316             Title : postprocess_data
317             Usage : $self->postprocess_data ( 'type' => 'string',
318             'location' => \$datastr);
319             Function: process downloaded data before loading into a Bio::SeqIO
320             Returns : void
321             Args : hash with two keys - 'type' can be 'string' or 'file'
322             - 'location' either file location or string
323             reference containing data
324              
325             =cut
326              
327             # don't need to do anything
328              
329             sub postprocess_data {
330 0     0 1   my ($self, %args) = @_;
331 0           return;
332             }
333              
334             =head2 default_format
335              
336             Title : default_format
337             Usage : my $format = $self->default_format
338             Function: Returns default sequence format for this module
339             Returns : string
340             Args : none
341              
342             =cut
343              
344             sub default_format {
345 0     0 1   return $DEFAULTFORMAT;
346             }
347              
348             =head2 Bio::DB::SwissProt specific routines
349              
350             =cut
351              
352             =head2 servertype
353              
354             Title : servertype
355             Usage : my $servertype = $self->servertype
356             $self->servertype($servertype);
357             Function: Get/Set server type
358             Returns : string
359             Args : server type string [optional]
360              
361             =cut
362              
363             sub servertype {
364 0     0 1   my ($self, $servertype) = @_;
365 0 0 0       if( defined $servertype && $servertype ne '') {
366             $self->throw("You gave an invalid server type ($servertype)".
367             " - available types are ".
368 0 0         keys %HOSTS) unless( $HOSTS{$servertype} );
369 0           $self->{'_servertype'} = $servertype;
370 0           $self->{'_hostlocation'} = $HOSTS{$servertype}->{'default'};
371              
372             # make sure format is reset properly in that different
373             # servers have different syntaxes
374 0           my ($existingformat,$seqioformat) = $self->request_format;
375 0           $self->request_format($existingformat);
376             }
377 0   0       return $self->{'_servertype'} || $DEFAULTSERVERTYPE;
378             }
379              
380              
381             =head2 hostlocation
382              
383             Title : hostlocation
384             Usage : my $location = $self->hostlocation()
385             $self->hostlocation($location)
386             Function: Set/Get Hostlocation
387             Returns : string representing hostlocation
388             Args : string specifying hostlocation [optional]
389              
390             =cut
391              
392             sub hostlocation {
393 0     0 1   my ($self, $location ) = @_;
394 0           my $servertype = $self->servertype;
395 0 0         $self->throw("Must have a valid servertype defined not $servertype")
396             unless defined $servertype;
397 0           my %hosts = %{$HOSTS{$servertype}->{'hosts'}};
  0            
398 0 0 0       if( defined $location && $location ne '' ) {
399 0           $location = lc $location;
400 0 0         if( ! $hosts{$location} ) {
401 0           $self->throw("Must specify a known host, not $location,".
402             " possible values (".
403             join(",", sort keys %hosts ). ")");
404             }
405 0           $self->{'_hostlocation'} = $location;
406             }
407 0           return $self->{'_hostlocation'};
408             }
409              
410             =head2 location_url
411              
412             Title : location
413             Usage : my $url = $self->location_url()
414             Function: Get host url
415             Returns : string representing url
416             Args : none
417              
418             =cut
419              
420             sub location_url {
421 0     0 1   my ($self) = @_;
422 0           my $servertype = $self->servertype();
423 0           my $location = $self->hostlocation();
424              
425 0 0 0       if( ! defined $location || !defined $servertype ) {
426 0           $self->throw("must have a valid hostlocation and servertype set before calling location_url");
427             }
428             return sprintf($HOSTS{$servertype}->{'baseurl'},
429 0           $HOSTS{$servertype}->{'hosts'}->{$location});
430             }
431              
432             =head2 request_format
433              
434             Title : request_format
435             Usage : my ($req_format, $ioformat) = $self->request_format;
436             $self->request_format("genbank");
437             $self->request_format("fasta");
438             Function: Get/Set sequence format retrieval. The get-form will normally
439             not be used outside of this and derived modules.
440             Returns : Array of two strings, the first representing the format for
441             retrieval, and the second specifying the corresponding SeqIO
442             format.
443             Args : $format = sequence format
444              
445             =cut
446              
447             sub request_format {
448 0     0 1   my ($self, $value) = @_;
449 0 0         if( defined $value ) {
450 0 0         if( $self->servertype =~ /expasy/ ) {
    0          
451 0 0 0       if( $value =~ /sprot/ || $value =~ /swiss/ ) {
    0          
452 0           $self->{'_format'} = [ 'sprot', 'swiss'];
453             } elsif( $value =~ /^fa/ ) {
454 0           $self->{'_format'} = [ 'fasta', 'fasta'];
455             } else {
456 0           $self->warn("Unrecognized format $value requested");
457 0           $self->{'_format'} = [ 'fasta', 'fasta'];
458             }
459             } elsif( $self->servertype =~ /ebi/ ) {
460 0 0 0       if( $value =~ /sprot/ || $value =~ /swiss/ ) {
    0          
461 0           $self->{'_format'} = [ 'swissprot', 'swiss' ];
462             } elsif( $value =~ /^fa/ ) {
463 0           $self->{'_format'} = [ 'fasta', 'fasta'];
464             } else {
465 0           $self->warn("Unrecognized format $value requested");
466 0           $self->{'_format'} = [ 'swissprot', 'swiss'];
467             }
468             }
469             }
470 0           return @{$self->{'_format'}};
  0            
471             }
472              
473             =head2 idtracker
474              
475             Title : idtracker
476             Usage : my ($newid) = $self->idtracker($oldid);
477             Function: Retrieve new ID using old ID.
478             Returns : single ID if one is found
479             Args : ID to look for
480              
481             =cut
482              
483             sub idtracker {
484 0     0 1   my ($self, $id) = @_;
485 0           $self->deprecated(
486             -message => 'The SwissProt IDTracker service is no longer available, '.
487             'use id_mapper() instead',
488             -warn_version => 1.006, # warn if $VERSION is >= this version
489             -throw_version => 1.007 # throw if $VERSION is >= this version
490             );
491             }
492              
493             =head2 id_mapper
494              
495             Title : id_tracker
496             Usage : my $map = $self->id_mapper( -from => '',
497             -to => '',
498             -ids => \@ids);
499             Function: Retrieve new ID using old ID.
500             Returns : hash reference of successfully mapped IDs
501             Args : -from : database mapping from
502             -to : database mapped to
503             -ids : a single ID or array ref of IDs to map
504             Note : For a list of valid database IDs, see:
505             http://www.uniprot.org/faq/28#id_mapping_examples
506              
507             =cut
508              
509             sub id_mapper {
510 0     0 1   my $self = shift;
511 0           my ($from, $to, $ids) = $self->_rearrange([qw(FROM TO IDS)], @_);
512 0           for ($from, $to) {
513 0 0         $self->throw("$_ is not a recognized database") if !exists $ID_MAPPING_DATABASES{$_};
514             }
515 0 0         my @ids = ref $ids ? @$ids : $ids;
516 0           my $params = {
517             from => $from,
518             to => $to,
519             format => 'tab',
520             query => join(' ',@ids)
521             };
522 0           my $ua = $self->ua;
523 0           push @{ $ua->requests_redirectable }, 'POST';
  0            
524 0           my $response = $ua->post("http://www.uniprot.org/mapping/", $params);
525 0           while (my $wait = $response->header('Retry-After')) {
526 0           $self->debug("Waiting...\n");
527 0           $self->_sleep;
528 0           $response = $ua->get($response->base);
529             }
530              
531 0           my %map;
532 0 0         if ($response->is_success) {
533 0           for my $line (split("\n", $response->content)) {
534 0           my ($id_from, $id_to) = split(/\s+/, $line, 2);
535 0 0         next if $id_from eq 'From';
536 0           push @{$map{$id_from}}, $id_to;
  0            
537             }
538             } else {
539 0           $self->throw("Error: ".$response->status_line."\n");
540             }
541 0           \%map;
542             }
543              
544             1;
545              
546             __END__