File Coverage

Bio/DasI.pm
Criterion Covered Total %
statement 17 29 58.6
branch 6 6 100.0
condition 5 6 83.3
subroutine 4 16 25.0
pod 12 13 92.3
total 44 70 62.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::DasI
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Lincoln Stein
7             #
8             # Copyright Lincoln Stein
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::DasI - DAS-style access to a feature database
17              
18             =head1 SYNOPSIS
19              
20             # Open up a feature database somehow...
21             $db = Bio::DasI->new(@args);
22              
23             @segments = $db->segment(-name => 'NT_29921.4',
24             -start => 1,
25             -end => 1000000);
26              
27             # segments are Bio::Das::SegmentI - compliant objects
28              
29             # fetch a list of features
30             @features = $db->features(-type=>['type1','type2','type3']);
31              
32             # invoke a callback over features
33             $db->features(-type=>['type1','type2','type3'],
34             -callback => sub { ... }
35             );
36              
37             $stream = $db->get_seq_stream(-type=>['type1','type2','type3']);
38             while (my $feature = $stream->next_seq) {
39             # each feature is a Bio::SeqFeatureI-compliant object
40             }
41              
42             # get all feature types
43             @types = $db->types;
44              
45             # count types
46             %types = $db->types(-enumerate=>1);
47              
48             @feature = $db->get_feature_by_name($class=>$name);
49             @feature = $db->get_feature_by_target($target_name);
50             @feature = $db->get_feature_by_attribute($att1=>$value1,$att2=>$value2);
51             $feature = $db->get_feature_by_id($id);
52              
53             $error = $db->error;
54              
55             =head1 DESCRIPTION
56              
57             Bio::DasI is a simplified alternative interface to sequence annotation
58             databases used by the distributed annotation system (see
59             L). In this scheme, the genome is represented as a series of
60             features, a subset of which are named. Named features can be used as
61             reference points for retrieving "segments" (see L),
62             and these can, in turn, be used as the basis for exploring the genome
63             further.
64              
65             In addition to a name, each feature has a "class", which is
66             essentially a namespace qualifier and a "type", which describes what
67             type of feature it is. Das uses the GO consortium's ontology of
68             feature types, and so the type is actually an object of class
69             Bio::Das::FeatureTypeI (see L). Bio::DasI
70             provides methods for interrogating the database for the types it
71             contains and the counts of each type.
72              
73             =head1 FEEDBACK
74              
75             =head2 Mailing Lists
76              
77             User feedback is an integral part of the evolution of this and other
78             Bioperl modules. Send your comments and suggestions preferably to one
79             of the Bioperl mailing lists. Your participation is much appreciated.
80              
81             bioperl-l@bioperl.org
82              
83             =head2 Support
84              
85             Please direct usage questions or support issues to the mailing list:
86              
87             I
88              
89             rather than to the module maintainer directly. Many experienced and
90             reponsive experts will be able look at the problem and quickly
91             address it. Please include a thorough description of the problem
92             with code and data examples if at all possible.
93              
94             =head2 Reporting Bugs
95              
96             Report bugs to the Bioperl bug tracking system to help us keep track
97             the bugs and their resolution. Bug reports can be submitted via the web:
98              
99             https://github.com/bioperl/bioperl-live/issues
100              
101             =head1 AUTHOR - Lincoln Stein
102              
103             Email lstein@cshl.org
104              
105             =head1 APPENDIX
106              
107             The rest of the documentation details each of the object
108             methods. Internal methods are usually preceded with a _
109              
110             =cut
111              
112             #'
113             # Let the code begin...
114              
115             package Bio::DasI;
116 3     3   18 use strict;
  3         3  
  3         69  
117              
118 3     3   12 use Bio::Das::SegmentI;
  3         3  
  3         63  
119             # Object preamble - inherits from Bio::Root::Root;
120 3     3   9 use base qw(Bio::Root::RootI Bio::SeqFeature::CollectionI);
  3         3  
  3         882  
121              
122             =head2 new
123              
124             Title : new
125             Usage : Bio::DasI->new(@args)
126             Function: Create new Bio::DasI object
127             Returns : a Bio::DasI object
128             Args : see below
129              
130             The new() method creates a new object. The argument list is either a
131             single argument consisting of a connection string, or the following
132             list of -name=Evalue arguments:
133              
134             Argument Description
135             -------- -----------
136              
137             -dsn Connection string for database
138             -adaptor Name of an adaptor class to use when connecting
139             -aggregator Array ref containing list of aggregators
140             "semantic mappers" to apply to database
141             -user Authentication username
142             -pass Authentication password
143              
144             Implementors of DasI may add other arguments.
145              
146             =cut
147              
148 0     0 1 0 sub new {shift->throw_not_implemented}
149              
150             =head2 types
151              
152             Title : types
153             Usage : $db->types(@args)
154             Function: return list of feature types in database
155             Returns : a list of Bio::Das::FeatureTypeI objects
156             Args : see below
157              
158             This routine returns a list of feature types known to the database. It
159             is also possible to find out how many times each feature occurs.
160              
161             Arguments are -option=Evalue pairs as follows:
162              
163             -enumerate if true, count the features
164              
165             The returned value will be a list of Bio::Das::FeatureTypeI objects
166             (see L.
167              
168             If -enumerate is true, then the function returns a hash (not a hash
169             reference) in which the keys are the stringified versions of
170             Bio::Das::FeatureTypeI and the values are the number of times each
171             feature appears in the database.
172              
173             =cut
174              
175 0     0 1 0 sub types { shift->throw_not_implemented; }
176              
177             =head2 parse_types
178              
179             Title : parse_types
180             Usage : $db->parse_types(@args)
181             Function: parses list of types
182             Returns : an array ref containing ['method','source'] pairs
183             Args : a list of types in 'method:source' form
184             Status : internal
185              
186             This method takes an array of type names in the format "method:source"
187             and returns an array reference of ['method','source'] pairs. It will
188             also accept a single argument consisting of an array reference with
189             the list of type names.
190              
191             =cut
192              
193             # turn feature types in the format "method:source" into a list of [method,source] refs
194             sub parse_types {
195 562     562 1 572 my $self = shift;
196 562 100 66     1384 return [] if !@_ or !defined($_[0]);
197 491 100 100     848 return $_[0] if ref $_[0] eq 'ARRAY' && ref $_[0][0];
198 481 100       827 my @types = ref($_[0]) ? @{$_[0]} : @_;
  35         69  
199 481         562 my @type_list = map { [split(':',$_,2)] } @types;
  1917         3526  
200 481         899 return \@type_list;
201             }
202              
203             =head2 segment
204              
205             Title : segment
206             Usage : $db->segment(@args);
207             Function: create a segment object
208             Returns : segment object(s)
209             Args : see below
210              
211             This method generates a Bio::Das::SegmentI object (see
212             L). The segment can be used to find overlapping
213             features and the raw sequence.
214              
215             When making the segment() call, you specify the ID of a sequence
216             landmark (e.g. an accession number, a clone or contig), and a
217             positional range relative to the landmark. If no range is specified,
218             then the entire region spanned by the landmark is used to generate the
219             segment.
220              
221             Arguments are -option=Evalue pairs as follows:
222              
223             -name ID of the landmark sequence.
224              
225             -class A namespace qualifier. It is not necessary for the
226             database to honor namespace qualifiers, but if it
227             does, this is where the qualifier is indicated.
228              
229             -version Version number of the landmark. It is not necessary for
230             the database to honor versions, but if it does, this is
231             where the version is indicated.
232              
233             -start Start of the segment relative to landmark. Positions
234             follow standard 1-based sequence rules. If not specified,
235             defaults to the beginning of the landmark.
236              
237             -end End of the segment relative to the landmark. If not specified,
238             defaults to the end of the landmark.
239              
240             The return value is a list of Bio::Das::SegmentI objects. If the method
241             is called in a scalar context and there are no more than one segments
242             that satisfy the request, then it is allowed to return the segment.
243             Otherwise, the method must throw a "multiple segment exception".
244              
245             =cut
246              
247             #'
248              
249 0     0 1   sub segment { shift->throw_not_implemented }
250              
251             =head2 features
252              
253             Title : features
254             Usage : $db->features(@args)
255             Function: get all features, possibly filtered by type
256             Returns : a list of Bio::SeqFeatureI objects
257             Args : see below
258             Status : public
259              
260             This routine will retrieve features in the database regardless of
261             position. It can be used to return all features, or a subset based on
262             their type
263              
264             Arguments are -option=Evalue pairs as follows:
265              
266             -types List of feature types to return. Argument is an array
267             of Bio::Das::FeatureTypeI objects or a set of strings
268             that can be converted into FeatureTypeI objects.
269              
270             -callback A callback to invoke on each feature. The subroutine
271             will be passed each Bio::SeqFeatureI object in turn.
272              
273             -attributes A hash reference containing attributes to match.
274              
275             The -attributes argument is a hashref containing one or more attributes
276             to match against:
277              
278             -attributes => { Gene => 'abc-1',
279             Note => 'confirmed' }
280              
281             Attribute matching is simple exact string matching, and multiple
282             attributes are ANDed together. See L for a
283             more sophisticated take on this.
284              
285             If one provides a callback, it will be invoked on each feature in
286             turn. If the callback returns a false value, iteration will be
287             interrupted. When a callback is provided, the method returns undef.
288              
289             =cut
290              
291 0     0 1   sub features { shift->throw_not_implemented }
292              
293             =head2 get_feature_by_name
294              
295             Title : get_feature_by_name
296             Usage : $db->get_feature_by_name(-class=>$class,-name=>$name)
297             Function: fetch features by their name
298             Returns : a list of Bio::SeqFeatureI objects
299             Args : the class and name of the desired feature
300             Status : public
301              
302             This method can be used to fetch named feature(s) from the database.
303             The -class and -name arguments have the same meaning as in segment(),
304             and the method also accepts the following short-cut forms:
305              
306             1) one argument: the argument is treated as the feature name
307             2) two arguments: the arguments are treated as the class and name
308             (note: this uses _rearrange() so the first argument must not
309             begin with a hyphen or it will be interpreted as a named
310             argument).
311              
312             This method may return zero, one, or several Bio::SeqFeatureI objects.
313             The implementor may allow the name to contain wildcards, in which case
314             standard C-shell glob semantics are expected.
315              
316             =cut
317              
318             sub get_feature_by_name {
319 0     0 1   shift->throw_not_implemented();
320             }
321              
322             =head2 get_feature_by_target
323              
324             Title : get_feature_by_target
325             Usage : $db->get_feature_by_target($class => $name)
326             Function: fetch features by their similarity target
327             Returns : a list of Bio::SeqFeatureI objects
328             Args : the class and name of the desired feature
329             Status : public
330              
331             This method can be used to fetch a named feature from the database
332             based on its similarity hit. The arguments are the same as
333             get_feature_by_name(). If this is not implemented, the interface
334             defaults to using get_feature_by_name().
335              
336             =cut
337              
338             sub get_feature_by_target {
339 0     0 1   shift->get_feature_by_name(@_);
340             }
341              
342             =head2 get_feature_by_id
343              
344             Title : get_feature_by_id
345             Usage : $db->get_feature_by_target($id)
346             Function: fetch a feature by its ID
347             Returns : a Bio::SeqFeatureI objects
348             Args : the ID of the feature
349             Status : public
350              
351             If the database provides unique feature IDs, this can be used to
352             retrieve a single feature from the database. If not overridden, this
353             interface calls get_feature_by_name() and returns the first element.
354              
355             =cut
356              
357             sub get_feature_by_id {
358 0     0 1   (shift->get_feature_by_name(@_))[0];
359             }
360              
361             =head2 get_feature_by_attribute
362              
363             Title : get_feature_by_attribute
364             Usage : $db->get_feature_by_attribute(attribute1=>value1,attribute2=>value2)
365             Function: fetch features by combinations of attribute values
366             Returns : a list of Bio::SeqFeatureI objects
367             Args : the class and name of the desired feature
368             Status : public
369              
370             This method can be used to fetch a set of features from the database.
371             Attributes are a list of name=Evalue pairs. They will be
372             logically ANDed together. If an attribute value is an array
373             reference, the list of values in the array is treated as an
374             alternative set of values to be ORed together.
375              
376             =cut
377              
378             sub get_feature_by_attribute {
379 0     0 1   shift->throw_not_implemented();
380             }
381              
382              
383             =head2 search_notes
384              
385             Title : search_notes
386             Usage : $db->search_notes($search_term,$max_results)
387             Function: full-text search on features, ENSEMBL-style
388             Returns : an array of [$name,$description,$score]
389             Args : see below
390             Status : public
391              
392             This routine performs a full-text search on feature attributes (which
393             attributes depend on implementation) and returns a list of
394             [$name,$description,$score], where $name is the feature ID,
395             $description is a human-readable description such as a locus line, and
396             $score is the match strength.
397              
398             Since this is a decidedly non-standard thing to do (but the generic
399             genome browser uses it), the default method returns an empty list.
400             You do not have to implement it.
401              
402             =cut
403              
404 0     0 1   sub search_notes { return }
405              
406             =head2 get_seq_stream
407              
408             Title : get_seq_stream
409             Usage : $seqio = $db->get_seq_stream(@args)
410             Function: Performs a query and returns an iterator over it
411             Returns : a Bio::SeqIO stream capable of returning Bio::SeqFeatureI objects
412             Args : As in features()
413             Status : public
414              
415             This routine takes the same arguments as features(), but returns a
416             Bio::SeqIO::Stream-compliant object. Use it like this:
417              
418             $stream = $db->get_seq_stream('exon');
419             while (my $exon = $stream->next_seq) {
420             print $exon,"\n";
421             }
422              
423             NOTE: In the interface this method is aliased to get_feature_stream(),
424             as the name is more descriptive.
425              
426             =cut
427              
428 0     0 1   sub get_seq_stream { shift->throw_not_implemented }
429 0     0 0   sub get_feature_stream {shift->get_seq_stream(@_) }
430              
431             =head2 refclass
432              
433             Title : refclass
434             Usage : $class = $db->refclass
435             Function: returns the default class to use for segment() calls
436             Returns : a string
437             Args : none
438             Status : public
439              
440             For data sources which use namespaces to distinguish reference
441             sequence accessions, this returns the default namespace (or "class")
442             to use. This interface defines a default of "Accession".
443              
444             =cut
445              
446 0     0 1   sub refclass { "Accession" }
447              
448             1;