File Coverage

blib/lib/Bio/DB/Das/Chado.pm
Criterion Covered Total %
statement 42 678 6.1
branch 0 318 0.0
condition 0 131 0.0
subroutine 14 60 23.3
pod 32 38 84.2
total 88 1225 7.1


line stmt bran cond sub pod time code
1             # $Id: Chado.pm,v 1.11 2009-08-25 19:29:43 scottcain Exp $
2              
3             =head1 NAME
4              
5             Bio::DB::Das::Chado - DAS-style access to a chado database
6              
7             =head1 SYNOPSIS
8              
9             # Open up a feature database
10             $db = Bio::DB::Das::Chado->new(
11             -dsn => 'dbi:Pg:dbname=gadfly;host=lajolla'
12             -user => 'jimbo',
13             -pass => 'supersecret',
14             );
15              
16             @segments = $db->segment(-name => '2L',
17             -start => 1,
18             -end => 1000000);
19              
20             # segments are Bio::Das::SegmentI - compliant objects
21              
22             # fetch a list of features
23             @features = $db->features(-type=>['type1','type2','type3']);
24              
25             # invoke a callback over features
26             $db->features(-type=>['type1','type2','type3'],
27             -callback => sub { ... }
28             );
29              
30             # get all feature types
31             @types = $db->types;
32              
33             # count types
34             %types = $db->types(-enumerate=>1);
35              
36             @feature = $db->get_feature_by_name($class=>$name);
37             @feature = $db->get_feature_by_target($target_name);
38             @feature = $db->get_feature_by_attribute($att1=>$value1,$att2=>$value2);
39             $feature = $db->get_feature_by_id($id);
40              
41             $error = $db->error;
42              
43             =head1 DESCRIPTION
44              
45             Bio::DB::Das::Chado allows DAS style access to a Chado database, getting
46             SeqFeatureI-compliant BioPerl objects and allowing GBrowse to access
47             a Chado database directly.
48              
49             =head1 FEEDBACK
50              
51             =head2 Mailing Lists
52              
53             User feedback is an integral part of the evolution of this and other
54             GMOD modules. Send your comments and suggestions preferably to one
55             of the GMOD mailing lists. Your participation is much appreciated.
56              
57             gmod-gbrowse@lists.sourceforge.com
58              
59             =head2 Reporting Bugs
60              
61             Report bugs to the GMOD bug tracking system at SourceForge to help us keep
62             track the bugs and their resolution.
63              
64             http://sourceforge.net/tracker/?group_id=27707&atid=391291
65              
66              
67             =head1 AUTHOR
68              
69             Scott Cain <scain@cpan.org>
70              
71             =head1 LICENSE
72              
73             This software may be redistributed under the same license as perl.
74              
75             =head1 APPENDIX
76              
77             The rest of the documentation details each of the object
78             methods. Internal methods are usually preceded with a _
79              
80             =cut
81              
82             #'
83              
84             package Bio::DB::Das::Chado;
85 1     1   27346 use strict;
  1         2  
  1         52  
86              
87 1     1   1023 use Bio::DB::Das::Chado::Segment;
  1         3  
  1         48  
88 1     1   31 use Bio::Root::Root;
  1         1  
  1         23  
89 1     1   1042 use Bio::DasI;
  1         2018  
  1         37  
90 1     1   8 use Bio::PrimarySeq;
  1         2  
  1         31  
91 1     1   5 use Bio::DB::GFF::Typename;
  1         2  
  1         27  
92 1     1   4598 use DBI;
  1         21941  
  1         88  
93 1     1   1255 use Bio::SeqFeature::Lite;
  1         5796  
  1         54  
94 1     1   8 use Carp qw(longmess);
  1         3  
  1         73  
95 1     1   7 use vars qw($VERSION @ISA);
  1         3  
  1         60  
96              
97 1     1   6 use Data::Dumper;
  1         3  
  1         54  
98              
99 1     1   7 use constant SEGCLASS => 'Bio::DB::Das::Chado::Segment';
  1         3  
  1         73  
100 1     1   5 use constant MAP_REFERENCE_TYPE => 'MapReferenceType'; #dgg
  1         3  
  1         44  
101 1     1   5 use constant DEBUG => 0;
  1         2  
  1         11009  
102              
103             $VERSION = 0.34;
104             @ISA = qw(Bio::Root::Root Bio::DasI);
105              
106             =head2 new
107              
108             Title : new
109             Usage : $db = Bio::DB::Das::Chado(
110             -dsn => 'dbi:Pg:dbname=gadfly;host=lajolla'
111             -user => 'jimbo',
112             -pass => 'supersecret',
113             );
114              
115             Function: Open up a Bio::DB::DasI interface to a Chado database
116             Returns : a new Bio::DB::Das::Chado object
117             Args :
118              
119             =over
120              
121             =item -dsn [dsn string]
122              
123             A full dbi dsn string for the database, optionally including host and port
124             information, like "dbi:Pg:dbname=chado;host=localhost;port=5432".
125              
126             =item -user [username]
127              
128             The database user name.
129              
130             =item -pass [password]
131              
132             The users password for the database.
133              
134             =item -organism [common_name|abbreviation|"Genus species"]
135              
136             Used to specify the organism that the features should be drawn from in
137             Chado instances that have more than one organism. The argument can be
138             the common name, the abbreviation or "Genus species". Since common name
139             and abbreviation are not guaranteed to be unique, if one of those is supplied
140             and it corresponds to more than one organism_id, the Chado adaptor will die.
141             Since the combination is guaranteed to be unique by table constraints,
142             supplying "Genus species" should always work.
143              
144             =item -srcfeatureslice [1|0] default: 1
145              
146             Setting this to 1 will enable searching for features using a function and
147             a corresponding index that can significantly speed searches, as long as
148             the featureloc_slice function is present in the Chado instance (all
149             "modern" instances of Chado do have this function). Since it available
150             in nearly all Chado instances, in a future release of this adaptor,
151             the default value of -srcfeatureslice will be set to 1 (on).
152              
153             =item -inferCDS [1|0] default: 0
154              
155             Given mRNA features that have exons and polypeptide features as children,
156             when inferCDS is set, the Chado adaptor will calculate the intersection
157             of the exons and polypeptide features and create CDS features that result.
158             This is generally needed when using gene and mRNA features with glyphs in
159             GBrowse that show subparts, like the gene and processed_transcript glyphs.
160             Since this is almost always required, in a future release of this adaptor,
161             the default will be switched to 1 (on).
162              
163             =item -fulltext [1|0] default: 0
164              
165             This item allows full text searching of various Chado text fields,
166             including feature.name, feature.uniquename, synonym.synonym_sgml,
167             dbxref.accession, and all_feature_names.name (which fequently includes
168             featureprop.value, depending on how all_feature_names is configured). Note
169             that to use -fulltext, you must run the preparation script,
170             gmod_chado_fts_prep.pl, on the database, and in addition, it might
171             be a good idea to set up a cronjob to keep the all_feature_names
172             materialized view up to date with the materialized view tool,
173             gmod_materialized_view_tool.pl.
174              
175             =item -recursivMapping [1|0] default: 0
176              
177             In the case where features are mapped to a "small" srcfeature (like
178             a contig) and then that small feature is mapped to a larger feature
179             (like a chromosome), setting -recursivMapping will allow the Chado
180             adaptor to calculate the coordinates of the feature on the larger
181             feature even though it isn't explicitly mapped to it. The Chado adaptor
182             suffers an approximately 20% performance penalty to do this mapping.
183              
184             =item -allow_obsolete [1|0] default: 0
185              
186             If set to 1, allow_obsolete will tell the Chado adaptor to ignore the
187             feature.is_obsolete column when querying to find features.
188              
189             =item -enable_seqscan [1|0] default: 1
190              
191             If set to zero, the -enable_seqscan will send a query planner hint to the
192             PostgreSQL server to make it more costly to do sequential scans on a table.
193             This is generally not necessary, as the query planner in Pg 8+ is smarter
194             than it used to be.
195              
196             =item -do2Level [1|0] default: 0
197              
198             do2Level is a flag for specifying that two "levels" at most of features should
199             be fetch when getting child features. This flag is generally unnecessary as
200             Bio::Graphics::Glyph supports specifying on a per glyph basis what should
201             be fetch. Use of this flag is incompatible with the -recursivMapping flag.
202              
203             =item -reference_class [SO type name]
204              
205             Used to specify what the "base type" is. Typically, this would be chromosome
206             or contig, but setting it is only necessary in the case where features
207             are mapped to more than one srcfeature and you don't want to use the
208             one that is lowest on the graph. For example, you have polypeptides that are
209             mapped to chromosomes and motifs that are mapped to polypeptides. If you
210             want to display the motifs on the polypeptide, you need to set "polypeptide"
211             as the argument for -reference_class.
212              
213             =item -tripal [1|0] default: 0
214              
215             If turned on, the tripal flag tells the adaptor that it is dealing with
216             a Chado instance that is working with Tripal, and so the query to fetch
217             features may fail with regard to analysis features. This flag attempts to
218             prevent that. It may mean that analysis features (like similarity results)
219             will be inaccessible to the adaptor, or at least scores associated with them
220             will be, depending on how they were loaded.
221              
222             =back
223              
224             =cut
225              
226             # create new database accessor object
227             # takes all the same args as a Bio::DB::BioDB class
228             sub new {
229 0     0 1   my $proto = shift;
230 0   0       my $self = bless {}, ref($proto) || $proto;
231              
232 0           my %arg = @_;
233              
234 0           my $dsn = $arg{-dsn};
235 0           my $username = $arg{-user};
236 0           my $password = $arg{-pass};
237 0           my $refclass = $arg{-reference_class};
238 0           my $tripal = $arg{-tripal};
239              
240 0           $self->{db_args}->{dsn} = $dsn;
241 0           $self->{db_args}->{username} = $username;
242 0           $self->{db_args}->{password} = $password;
243              
244 0 0         my $dbh = DBI->connect( $dsn, $username, $password )
245             or $self->throw("unable to open db handle");
246 0           $self->dbh($dbh);
247              
248 0           warn "$dbh\n" if DEBUG;
249              
250             # determine which cv to use for SO terms
251              
252 0           $self->sofa_id(1);
253              
254 0           warn "SOFA id to use: ",$self->sofa_id() if DEBUG;
255              
256             # get the cvterm relationships here and save for later use
257              
258 0           my $cvterm_query="select ct.cvterm_id,ct.name as name, c.name as cvname
259             from cvterm ct, cv c
260             where ct.cv_id=c.cv_id and
261             (c.name IN (
262             'relationship',
263             'relationship type','Relationship Ontology',
264             'autocreated')
265             OR c.cv_id = ?)";
266              
267 0           warn "cvterm query: $cvterm_query\n" if DEBUG;
268              
269 0 0         my $sth = $self->dbh->prepare($cvterm_query)
270             or warn "unable to prepare select cvterms";
271              
272 0 0         $sth->execute($self->sofa_id()) or $self->throw("unable to select cvterms");
273              
274             # my $cvterm_id = {}; replaced with better-named variables
275             # my $cvname = {};
276              
277 0           my(%term2name,%name2term) = ({},{});
278 0           my %termcv=();
279            
280 0           while (my $hashref = $sth->fetchrow_hashref) {
281 0           $term2name{ $hashref->{cvterm_id} } = $hashref->{name};
282 0           $termcv{ $hashref->{cvterm_id} } = $hashref->{cvname}; # dgg
283            
284             #this addresses a bug in gmod_load_gff3 (Scott!), which creates a 'part_of'
285             #term in addition to the OBO_REL one that already exists! this will also
286             #help with names that exist in both GO and SO, like 'protein'.
287             # dgg: but this array is bad for callers of name2term() who expect scalar result
288             # mostly want only sofa terms
289            
290 0 0         if(defined($name2term{ $hashref->{name} })){ #already seen this name
291              
292 0 0         if(ref($name2term{ $hashref->{name} }) ne 'ARRAY'){ #already array-converted
293              
294 0           $name2term{ $hashref->{name} } = [ $name2term{ $hashref->{name} } ];
295              
296             }
297              
298 0           push @{ $name2term{ $hashref->{name} } }, $hashref->{cvterm_id};
  0            
299              
300             } else {
301              
302 0           $name2term{ $hashref->{name} } = $hashref->{cvterm_id};
303              
304             }
305             }
306 0           $sth->finish;
307              
308 0           $self->term2name(\%term2name);
309 0           $self->name2term(\%name2term, \%termcv);
310              
311             #Recursive Mapping
312 0 0         $self->recursivMapping($arg{-recursivMapping} ? $arg{-recursivMapping} : 0);
313              
314 0 0         $self->inferCDS($arg{-inferCDS} ? $arg{-inferCDS} : 0);
315 0 0         $self->allow_obsolete($arg{-allow_obsolete} ? $arg{-allow_obsolete} : 0);
316              
317 0 0 0       if (exists($arg{-enable_seqscan}) && ! $arg{-enable_seqscan}){
318 0           $self->dbh->do("set enable_seqscan=0");
319             }
320              
321 0 0         $self->srcfeatureslice(defined $arg{-srcfeatureslice} ? $arg{-srcfeatureslice} : 1);
322 0 0         $self->do2Level($arg{-do2Level} ? $arg{-do2Level} : 0);
323              
324 0 0         if ($arg{-organism}) {
325 0           $self->organism_id($arg{-organism});
326             }
327              
328             #determine if all_feature_names view or table exist
329             #$self->use_all_feature_names();
330              
331             #determine the type_id of the ref class and cache it
332 0           $self->refclass($self->name2term($refclass));
333              
334 0           $self->fulltext($arg{-fulltext});
335 0           $self->tripal($arg{-tripal});
336              
337 0           return $self;
338             }
339              
340             =head2 feature_summary
341              
342             =over
343              
344             =item Usage
345              
346             $obj->feature_summary()
347              
348             =item Function
349              
350             This function is based on Bio::DB::SeqFeature::Store->feature_summary.
351             The text that follows comes from it's documtation:
352              
353             This method is used to get coverage density information across a
354             region of interest. You provide it with a region of interest, optional
355             a list of feature types, and a count of the number of bins over which
356             you want to calculate the coverage density. An object is returned
357             corresponding to the requested region. It contains a tag called
358             "coverage" that will return an array ref of "bins" length. Each
359             element of the array describes the number of features that overlap the
360             bin at this postion.
361              
362             Note that this method uses an approximate algorithm that is only
363             accurate to 500 bp, so when dealing with bins that are smaller than
364             1000 bp, you may see some shifting of counts between adjacent bins.
365              
366             Although an -iterator option is provided, the method only ever returns
367             a single feature, so this is fairly useless.
368              
369             =item Returns
370              
371             A single feature containing summary data, or an interator containing
372             that one feature.
373              
374             =item Arguments
375              
376             -seq_id Sequence ID for the region
377             -start Start of region
378             -end End of region
379             -type/-types Feature type of interest or array ref of types
380             -bins Number of bins across region. Defaults to 1000.
381             -iterator Return an iterator across the region
382              
383             =back
384              
385             =cut
386              
387             sub feature_summary {
388 0     0 1   my $self = shift;
389 0           my ($seq_name,$seq_id,$ref,$start,$stop,$end,$types,$type,$primary_tag,$bins,$iterator) =
390             $self->_rearrange(['SEQID','SEQ_ID','REF','START','STOP','END',
391             'TYPES','TYPE','PRIMARY_TAG',
392             'BINS',
393             'ITERATOR',
394             ],@_);
395              
396 0   0       $seq_name ||=$seq_id ||=$ref;
      0        
397 0   0       $end ||=$end;
398 0   0       $types ||=$type ||=$primary_tag;
      0        
399              
400 0           warn $types if DEBUG;
401              
402 0 0         my ($coverage,$tag) = $self->coverage_array(-seqid=> $seq_name,
403             -start=> $start,
404             -end => $end,
405             -type => $types,
406             -bins => $bins) or return;
407 0           my $score = 0;
408 0           for (@$coverage) { $score += $_ }
  0            
409 0           $score /= @$coverage;
410              
411 0           my $feature = Bio::SeqFeature::Lite->new(-seq_id => $seq_name,
412             -start => $start,
413             -end => $end,
414             -type => $tag,
415             -score => $score,
416             -attributes =>
417             { coverage => [$coverage] });
418              
419 0           my @features = ($feature);
420 0 0         return $iterator
421             ? Bio::DB::Das::ChadoIterator->new(\@features)
422             : $feature;
423             }
424              
425              
426             =head2 coverage_array
427              
428             =over
429              
430             =item Usage
431              
432             $obj->coverage_array()
433              
434             =item Function
435              
436             Calculates the coverage/density of a particular feature type
437             over a range.
438              
439             =item Returns
440              
441             A reference to the coverage array, or if called in an array
442             context, a two element array with the reference to the coverage
443             array first and the type that it was called with as the second
444             element.
445              
446             =item Arguments
447              
448             seqid
449             start
450             stop
451             type
452             bins
453              
454             =back
455              
456             This is based on the method of the same name in
457             Bio::DB::SeqFeature::Store::DBI::mysql
458              
459             =cut
460              
461             sub coverage_array {
462 0     0 1   my $self = shift;
463 0           my ($seq_name,$seq_id,$ref,$start,$end,$stop,$types,$type,$primary_tag,$bins) =
464             $self->_rearrange(['SEQID','SEQ_ID','REF','START','STOP','END',
465             'TYPES','TYPE','PRIMARY_TAG','BINS'],@_);
466              
467 0   0       $seq_name ||= $seq_id ||= $ref;
      0        
468 0   0       $types ||= $type ||= $primary_tag;
      0        
469 0   0       $end ||= $stop;
470              
471 0           my $summary_bin_size = 1000;
472 0   0       $bins ||= 1000;
473 0   0       $start ||= 1;
474 0 0         my $segment = $self->segment(-name =>$seq_name) or $self->throw("unknown seq_id $seq_name");
475 0   0       $end ||= $segment->end;
476            
477 0           my $binsize = ($end-$start+1)/$bins;
478 0           my $seqid = $segment->feature_id;
479              
480 0           warn "$seqid in coverage array" if DEBUG;
481              
482 0 0         return [] unless $seqid;
483              
484             # where each bin starts
485 0           my @his_bin_array = map {$start + $binsize * $_} (0..$bins);
  0            
486 0           my @sum_bin_array = map {int(($_-1)/$summary_bin_size)} @his_bin_array;
  0            
487              
488 0           my $interval_stats = 'gff_interval_stats';
489            
490             # pick up the type ids
491              
492             #WARNING: potential bug below. This query and the loop that processes
493             #it is from Lincoln's implementation for SeqFeature::Store. The query
494             #seems to rely on getting the results back sorted even though the
495             #query doesn't explicitly sort (the ORDER BY commented out was from me)
496             #With sorting the processing takes much longer, so I'm leaving it out
497             #for now, but reimplementing might be a good idea.
498              
499 0           my %bins;
500 0           my $sql = <<END;
501             SELECT bin,cum_count
502             FROM $interval_stats
503             WHERE (typeid=? OR typeid like ? ) AND bin >=? AND srcfeature_id =?
504             -- ORDER BY bin
505             LIMIT 1
506             END
507             ;
508              
509 0           my $sth = $self->dbh->prepare($sql);
510              
511 0           my @t;
512 0 0         if (ref $types eq 'ARRAY') {
513 0           @t = @$types;
514             }
515             else {
516 0           @t = ($types);
517             }
518              
519 0           warn join(" ", @t) . " types in coverage array" if DEBUG;
520              
521 0           eval {
522 0           for my $typeid (@t) {
523 0           my $typestr = $self->_types_sql($typeid);
524              
525 0           warn "$typestr typestr in coverage array" if DEBUG;
526              
527 0           for (my $i=0;$i<@sum_bin_array;$i++) {
528              
529 0           my @args = ($typestr,$typestr,$sum_bin_array[$i],$seqid);
530              
531 0 0         $sth->execute(@args) or $self->throw($sth->errstr);
532 0           my ($bin,$cum_count) = $sth->fetchrow_array;
533 0           push @{$bins{$typeid}},[$bin,$cum_count];
  0            
534             }
535             }
536             };
537              
538              
539 0 0         return unless %bins;
540              
541 0           my @tags;
542             my @merged_bins;
543 0           my $firstbin = int(($start-1)/$binsize);
544 0           for my $type (keys %bins) {
545 0           push @tags, $type;
546 0           my $arry = $bins{$type};
547 0           my $last_count = $arry->[0][1];
548 0           my $last_bin = -1;
549 0           my $i = 0;
550 0           my $delta;
551 0           for my $b (@$arry) {
552 0           my ($bin,$count) = @$b;
553 0 0         $delta = $count - $last_count if $bin > $last_bin;
554 0           $merged_bins[$i++] = $delta;
555 0           $last_count = $count;
556 0           $last_bin = $bin;
557             }
558             }
559              
560 0           my $report_tag = join(",",@tags);
561 0 0         return wantarray ? (\@merged_bins,$report_tag) : \@merged_bins;
562             }
563              
564              
565             sub _types_sql {
566 0     0     my $self = shift;
567 0           my $type = shift;
568 0           my ($primary_tag,$source_tag,$typestr);
569              
570 0 0 0       if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
571 0           $primary_tag = $type->method;
572 0           $source_tag = $type->source;
573             } else {
574 0           ($primary_tag,$source_tag) = split ':',$type,2;
575             }
576              
577 0 0         if (defined $source_tag) {
578 0 0         if (length($primary_tag)) {
579 0           $typestr = "$primary_tag:$source_tag";
580             }
581             else {
582 0           $typestr = "%:$source_tag";
583             }
584             } else {
585 0           $typestr = "$primary_tag:%";
586             }
587              
588 0           return ($typestr);
589             }
590              
591             =head2 tripal
592              
593             =over
594              
595             =item Usage
596              
597             $obj->tripal() #get existing value
598             $obj->tripal($newval) #set new value
599              
600             =item Function
601              
602             Flag to identfy Chado database that are working with Tripal
603              
604             =item Returns
605              
606             value of tripal (a scalar)
607              
608             =item Arguments
609              
610             new value of tripal (to set)
611              
612             =back
613              
614             =cut
615              
616             sub tripal {
617 0     0 1   my $self = shift;
618 0 0         my $tripal = shift if defined(@_);
619 0 0         return $self->{'tripal'} = $tripal if defined($tripal);
620 0           return $self->{'tripal'};
621             }
622              
623              
624              
625             =head2 fulltext
626              
627             =over
628              
629             =item Usage
630              
631             $obj->fulltext() #get existing value
632             $obj->fulltext($newval) #set new value
633              
634             =item Function
635              
636             Flag to govern the use of full text searching queries
637              
638             =item Returns
639              
640             value of fulltext (a scalar)
641              
642             =item Arguments
643              
644             new value of fulltext (to set)
645              
646             =back
647              
648             =cut
649              
650             sub fulltext {
651 0     0 1   my $self = shift;
652 0 0         my $fulltext = shift if defined(@_);
653 0 0         return $self->{'fulltext'} = $fulltext if defined($fulltext);
654 0           return $self->{'fulltext'};
655             }
656              
657              
658             =head2 refclass
659              
660             =over
661              
662             =item Usage
663              
664             $obj->refclass() #get existing value
665             $obj->refclass($newval) #set new value
666              
667             =item Function
668              
669             =item Returns
670              
671             value of the reference class's cvterm_id (a scalar)
672              
673             =item Arguments
674              
675             new value of the reference class's cvterm_id (to set)
676              
677             =back
678              
679             =cut
680              
681             sub refclass {
682 0     0 1   my $self = shift;
683 0 0         my $refclass = shift if defined(@_);
684 0 0         return $self->{'refclass'} = $refclass if defined($refclass);
685 0           return $self->{'refclass'};
686             }
687              
688              
689             =head2 use_all_feature_names
690              
691             Title : use_all_feature_names
692             Usage : $obj->use_all_feature_names()
693             Function: set or return flag indicating that all_feature_names view is present
694             Returns : 1 if all_feature_names present, 0 if not
695             Args : to return the flag, none; to set, 1
696              
697              
698             =cut
699              
700             sub use_all_feature_names {
701 0     0 1   my ($self, $flag) = @_;
702              
703 0 0         return $self->{use_all_feature_names} = $flag
704             if defined($flag);
705 0 0         return $self->{use_all_feature_names}
706             if defined $self->{use_all_feature_names};
707              
708             #now determine if either a view or table named all_feature_names is present
709 0           my $query
710             = "SELECT relkind FROM pg_class WHERE relname = 'all_feature_names'";
711              
712 0           my $exists = $self->dbh->prepare($query);
713 0 0         $exists->execute or warn "all_feature_names query failed: $!";
714              
715 0           my ($kind) = $exists->fetchrow_array;
716 0 0 0       if ($kind and ($kind eq 'r' or $kind eq 'v')) {
    0 0        
717 0           $self->{use_all_feature_names} = 1;
718             }
719             elsif ($kind) {
720 0           warn "all_feature_names: This option shouldn't happen--setting use_all_feature_names to zero.";
721 0           $self->{use_all_feature_names} = 0;
722             }
723             else {
724 0           $self->{use_all_feature_names} = 0;
725             }
726 0           $exists->finish;
727              
728 0           return $self->{use_all_feature_names};
729             }
730              
731             =head2 organism_id
732              
733             Title : organism_id
734             Usage : $obj->organism_id()
735             Function: set or return the organism_id
736             Returns : the value of the id
737             Args : to return the flag, none; to set, the common name of the organism
738              
739             If -organism is set when the Chado feature is instantiated, this method
740             queries the database with the common name to cache the organism_id.
741              
742             =cut
743              
744             sub organism_id {
745 0     0 1   my $self = shift;
746 0           my $organism_name = shift;
747              
748 0 0         if (!$organism_name) {
749 0           return $self->{'organism_id'};
750             }
751              
752 0           my $dbh = $self->dbh;
753              
754             #if there is a space in the name, check genus species
755 0 0         if ($organism_name =~ /(\S+?)\s+(.+)/) {
756 0           my $genus = $1;
757 0           my $species = $2;
758 0           my $species_query = $dbh->prepare("SELECT organism_id FROM organism WHERE genus = ? and species =
759             ?");
760 0 0         $species_query->execute($genus, $species) or die "organism genus species query failed:$!";
761              
762             #don't need to check for multiple rows because of unique constraint
763 0 0         if ($species_query->rows == 1) {
764 0           my($organism_id) = $species_query->fetchrow_array;
765              
766 0 0         if ($organism_id) {
767 0           return $self->{'organism_id'} = $organism_id;
768             }
769              
770             }
771             }
772              
773             #check common name
774 0           my $org_query = $dbh->prepare("SELECT organism_id FROM organism WHERE common_name = ?");
775              
776 0 0         $org_query->execute($organism_name) or die "organism query failed:$!";
777              
778             #if more than one result for common name, croak
779 0 0         if ($org_query->rows > 1) {
    0          
780 0           $self->throw("The common organism name, $organism_name, is present more than once in the organism table; please use a more precice representation of the organism.");
781             }
782             elsif ($org_query->rows == 0 ) {
783             #no--don't do anything here--let it go on to check other things
784             #$self->throw("There is no organism in the organism table with a common name '$organism_name'; please check the spelling.");
785             }
786             else {
787 0           my($organism_id) = $org_query->fetchrow_array;
788              
789 0 0         if ($organism_id) {
790 0           return $self->{'organism_id'} = $organism_id;
791             }
792             }
793 0           $org_query->finish;
794              
795             #check abbrev
796 0           my $abbrev_query = $dbh->prepare("SELECT organism_id FROM organism WHERE abbreviation = ?");
797              
798 0 0         $abbrev_query->execute($organism_name) or die "organism abbrev query failed:$!";
799              
800 0 0         if ($abbrev_query->rows > 1) {
    0          
801 0           $self->throw("The abbreviated organism name, $organism_name, is present more than once in the organism table; please use a more precice representation of the organism.");
802             }
803             elsif ($abbrev_query->rows == 0) {
804             #do nothing in case another check is added after this one
805             }
806             else {
807 0           my($organism_id) = $abbrev_query->fetchrow_array;
808              
809 0 0         if ($organism_id) {
810 0           return $self->{'organism_id'} = $organism_id;
811             }
812             }
813              
814 0           $self->throw("Tried everything to get an organism_id for '$organism_name' but failed; try 'genus species'");
815 0           return; #of course, this return will never get used
816             }
817              
818              
819              
820             =head2 inferCDS
821              
822             Title : inferCDS
823             Usage : $obj->inferCDS()
824             Function: set or return the inferCDS flag
825             Returns : the value of the inferCDS flag
826             Args : to return the flag, none; to set, 1
827              
828             Often, chado databases will be populated without CDS features, since
829             they can be inferred from a union of exons and polypeptide features.
830             Setting this flag tells the adaptor to do the inferrence to get
831             those derived CDS features (at some small performance penatly).
832              
833             =cut
834              
835             sub inferCDS {
836 0     0 1   my $self = shift;
837              
838 0           my $flag = shift;
839 0 0         return $self->{inferCDS} = $flag if defined($flag);
840 0           return $self->{inferCDS};
841             }
842              
843             =head2 allow_obsolete
844              
845             Title : allow_obsolete
846             Usage : $obj->allow_obsolete()
847             Function: set or return the allow_obsolete flag
848             Returns : the value of the allow_obsolete flag
849             Args : to return the flag, none; to set, 1
850              
851             The chado feature table has a flag column called 'is_obsolete'.
852             Normally, these features should be ignored by GBrowse, but
853             the -allow_obsolete method is provided to allow displaying
854             obsolete features.
855              
856             =cut
857              
858             sub allow_obsolete {
859 0     0 1   my $self = shift;
860 0 0         my $allow_obsolete = shift if defined(@_);
861 0 0         return $self->{'allow_obsolete'} = $allow_obsolete if defined($allow_obsolete);
862 0           return $self->{'allow_obsolete'};
863             }
864              
865              
866             =head2 sofa_id
867              
868             Title : sofa_id
869             Usage : $obj->sofa_id()
870             Function: get or return the ID to use for SO terms
871             Returns : the cv.cv_id for the SO ontology to use
872             Args : to return the id, none; to determine the id, 1
873              
874             =cut
875              
876             sub sofa_id {
877 0     0 1   my $self = shift;
878 0 0         return $self->{'sofa_id'} unless @_;
879              
880 0           my $query = "select cv_id from cv where name in (
881             'SOFA',
882             'Sequence Ontology Feature Annotation',
883             'sofa.ontology')";
884              
885 0           my $sth = $self->dbh->prepare($query);
886 0 0         $sth->execute() or $self->throw("trying to find SOFA");
887              
888 0           my $data = $sth->fetchrow_hashref();
889 0           my $sofa_id = $$data{'cv_id'};
890              
891 0           $sth->finish;
892 0 0         return $self->{'sofa_id'} = $sofa_id if $sofa_id;
893              
894 0           $query = "select cv_id from cv where name in (
895             'Sequence Ontology',
896             'sequence',
897             'SO')";
898              
899 0           $sth = $self->dbh->prepare($query);
900 0 0         $sth->execute() or $self->throw("trying to find SO");
901              
902 0           $data = $sth->fetchrow_hashref();
903 0           $sofa_id = $$data{'cv_id'};
904              
905 0           $sth->finish;
906 0 0         return $self->{'sofa_id'} = $sofa_id if $sofa_id;
907              
908 0           $self->throw("unable to find SO or SOFA in the database!");
909             }
910              
911             =head2 recursivMapping
912              
913             Title : recursivMapping
914             Usage : $obj->recursivMapping($newval)
915             Function: Flag for activating the recursive mapping (desactivated by default)
916             Returns : value of recursivMapping (a scalar)
917             Args : on set, new value (a scalar or undef, optional)
918              
919             Goal : When we have a clone mapped on a chromosome, the recursive mapping maps the features of the clone on the chromosome.
920              
921             =cut
922              
923             sub recursivMapping{
924 0     0 1   my $self = shift;
925              
926 0 0         return $self->{'recursivMapping'} = shift if @_;
927 0           return $self->{'recursivMapping'};
928             }
929              
930             =head2 srcfeatureslice
931              
932             Title : srcfeatureslice
933             Usage : $obj->srcfeatureslice
934             Function: Flag for activating
935             Returns : value of srcfeatureslice
936             Args : on set, new value (a scalar or undef, optional)
937             Desc : Allows to use a featureslice of type featureloc_slice(srcfeat_id, int, int)
938             Important : this and recursivMapping are mutually exclusives
939              
940             =cut
941              
942             sub srcfeatureslice{
943 0     0 1   my $self = shift;
944 0 0         return $self->{'srcfeatureslice'} = shift if @_;
945 0           return $self->{'srcfeatureslice'};
946             }
947              
948             =head2 do2Level
949              
950             Title : do2Level
951             Usage : $obj->do2Level
952             Function: Flag for activating the fetching of 2levels in segment->features
953             Returns : value of do2Level
954             Args : on set, new value (a scalar or undef, optional)
955              
956             =cut
957              
958             sub do2Level{
959 0     0 1   my $self = shift;
960 0 0         return $self->{'do2Level'} = shift if @_;
961 0           return $self->{'do2Level'};
962             }
963              
964              
965             =head2 dbh
966              
967             Title : dbh
968             Usage : $obj->dbh($newval)
969             Function:
970             Returns : value of dbh (a scalar)
971             Args : on set, new value (a scalar or undef, optional)
972              
973              
974             =cut
975              
976             sub dbh {
977 0     0 1   my $self = shift;
978              
979 0 0         return $self->{'dbh'} = shift if @_;
980 0 0         return $self->{'dbh'} if defined ($self->{'dbh'});
981              
982             #uh oh, there isn't already a dbh object, try to create one
983 0           my $dsn = $self->{db_args}->{dsn};
984 0           my $username = $self->{db_args}->{username};
985 0           my $password = $self->{db_args}->{password};
986              
987 0 0         my $dbh = DBI->connect( $dsn, $username, $password )
988             or $self->throw("unable to open db handle");
989 0           $self->{'dbh'} = $dbh;
990              
991 0 0 0       if (exists($self->{-enable_seqscan}) && ! $self->{-enable_seqscan}){
992 0           $dbh->do("set enable_seqscan=0");
993             }
994              
995 0           return $self->{'dbh'};
996             }
997              
998             =head2 term2name
999              
1000             Title : term2name
1001             Usage : $obj->term2name($newval)
1002             Function: When called with a hashref, sets cvterm.cvterm_id to cvterm.name
1003             mapping hashref; when called with an int, returns the name
1004             corresponding to that cvterm_id; called with no arguments, returns
1005             the hashref.
1006             Returns : see above
1007             Args : on set, a hashref; to retrieve a name, an int; to retrieve the
1008             hashref, none.
1009              
1010             Note: should be replaced by Bio::GMOD::Util->term2name
1011              
1012             =cut
1013              
1014             sub term2name {
1015 0     0 1   my $self = shift;
1016 0           my $arg = shift;
1017              
1018 0 0         if(ref($arg) eq 'HASH'){
    0          
1019 0           return $self->{'term2name'} = $arg;
1020             } elsif($arg) {
1021 0           return $self->{'term2name'}{$arg};
1022             } else {
1023 0           return $self->{'term2name'};
1024             }
1025             }
1026              
1027              
1028             =head2 name2term
1029              
1030             Title : name2term
1031             Usage : $obj->name2term($newval)
1032             Function: When called with a hashref, sets cvterm.name to cvterm.cvterm_id
1033             mapping hashref; when called with a string, returns the cvterm_id
1034             corresponding to that name; called with no arguments, returns
1035             the hashref.
1036             Returns : see above
1037             Args : on set, a hashref; to retrieve a cvterm_id, a string; to retrieve
1038             the hashref, none.
1039              
1040             Note: Should be replaced by Bio::GMOD::Util->name2term
1041              
1042             =cut
1043              
1044             sub name2term {
1045 0     0 1   my $self = shift;
1046 0           my $arg = shift;
1047 0           my $cvnames = shift;
1048              
1049 0 0         if(ref($cvnames) eq 'HASH'){ $self->{'termcvs'} = $cvnames; }
  0            
1050 0 0         if(ref($arg) eq 'HASH'){
    0          
1051 0           return $self->{'name2term'} = $arg;
1052             } elsif($arg) {
1053 0           return $self->{'name2term'}{$arg};
1054              
1055             #rather than trying to guess what a caller wants, the caller will have
1056             #deal with what comes... (ie, a scalar or a hash).
1057             # my $val= $self->{'name2term'}{$arg};
1058             # if(ref($val)) {
1059             # #? use $cvnames scalar here to pick which cv?
1060             # my @val= @$val;
1061             # foreach $val (@val) {
1062             # my $cv= $self->{'termcvs'}{$val};
1063             # return $val if($cv =~ /^(SO|sequence)/i); # want sofa_id
1064             # }
1065             # return $val[0]; #? 1st is best guess
1066             # }
1067             # return $val;
1068              
1069             } else {
1070 0           return $self->{'name2term'};
1071             }
1072             }
1073              
1074             =head2 segment
1075              
1076             Title : segment
1077             Usage : $db->segment(@args);
1078             Function: create a segment object
1079             Returns : segment object(s)
1080             Args : see below
1081              
1082             This method generates a Bio::Das::SegmentI object (see
1083             L<Bio::Das::SegmentI>). The segment can be used to find overlapping
1084             features and the raw sequence.
1085              
1086             When making the segment() call, you specify the ID of a sequence
1087             landmark (e.g. an accession number, a clone or contig), and a
1088             positional range relative to the landmark. If no range is specified,
1089             then the entire region spanned by the landmark is used to generate the
1090             segment.
1091              
1092             Arguments are -option=E<gt>value pairs as follows:
1093              
1094             -name ID of the landmark sequence.
1095              
1096             -class A namespace qualifier. It is not necessary for the
1097             database to honor namespace qualifiers, but if it
1098             does, this is where the qualifier is indicated.
1099              
1100             -version Version number of the landmark. It is not necessary for
1101             the database to honor versions, but if it does, this is
1102             where the version is indicated.
1103              
1104             -start Start of the segment relative to landmark. Positions
1105             follow standard 1-based sequence rules. If not specified,
1106             defaults to the beginning of the landmark.
1107              
1108             -end End of the segment relative to the landmark. If not specified,
1109             defaults to the end of the landmark.
1110              
1111             The return value is a list of Bio::Das::SegmentI objects. If the method
1112             is called in a scalar context and there are no more than one segments
1113             that satisfy the request, then it is allowed to return the segment.
1114             Otherwise, the method must throw a "multiple segment exception".
1115              
1116             =cut
1117              
1118             sub segment {
1119 0     0 1   my $self = shift;
1120 0           my ($name,$base_start,$stop,$end,$class,$version,$db_id,$feature_id,$srcfeature_id)
1121             = $self->_rearrange([qw(NAME
1122             START
1123             STOP
1124             END
1125             CLASS
1126             VERSION
1127             DB_ID
1128             FEATURE_ID
1129             SRCFEATURE_ID )],@_);
1130             # lets the Segment class handle all the lifting.
1131              
1132 0   0       $end ||= $stop;
1133 0           return $self->_segclass->new($name,$self,$base_start,$end,$db_id,0,$feature_id,$srcfeature_id);
1134             }
1135              
1136             =head2 features
1137              
1138             Title : features
1139             Usage : $db->features(@args)
1140             Function: get all features, possibly filtered by type
1141             Returns : a list of Bio::SeqFeatureI objects
1142             Args : see below
1143             Status : public
1144              
1145             This routine will retrieve features in the database regardless of
1146             position. It can be used to return all features, or a subset based on
1147             their type
1148              
1149             Arguments are -option=E<gt>value pairs as follows:
1150              
1151             -type List of feature types to return. Argument is an array
1152             of Bio::Das::FeatureTypeI objects or a set of strings
1153             that can be converted into FeatureTypeI objects.
1154              
1155             -callback A callback to invoke on each feature. The subroutine
1156             will be passed each Bio::SeqFeatureI object in turn.
1157              
1158             -attributes A hash reference containing attributes to match.
1159              
1160             The -attributes argument is a hashref containing one or more attributes
1161             to match against:
1162              
1163             -attributes => { Gene => 'abc-1',
1164             Note => 'confirmed' }
1165              
1166             Attribute matching is simple exact string matching, and multiple
1167             attributes are ANDed together.
1168              
1169             If one provides a callback, it will be invoked on each feature in
1170             turn. If the callback returns a false value, iteration will be
1171             interrupted. When a callback is provided, the method returns undef.
1172              
1173             =cut
1174              
1175             sub features {
1176 0     0 1   my $self = shift;
1177 0           my ($type,$types,$callback,$attributes,$iterator,$feature_id,$seq_id,$start,$end) =
1178             $self->_rearrange([qw(TYPE TYPES CALLBACK ATTRIBUTES ITERATOR FEATURE_ID SEQ_ID START END)],
1179             @_);
1180              
1181 0   0       $type ||= $types; #GRRR
1182              
1183 0           warn "Chado,features: $type\n" if DEBUG;
1184 0           my @features = $self->_segclass->features(-type => $type,
1185             -attributes => $attributes,
1186             -callback => $callback,
1187             -iterator => $iterator,
1188             -factory => $self,
1189             -feature_id=>$feature_id,
1190             -seq_id =>$seq_id,
1191             -start =>$start,
1192             -end =>$end,
1193             );
1194 0           return @features;
1195             }
1196              
1197             sub get_seq_stream {
1198 0     0 1   my $self = shift;
1199             #warn "get_seq_stream args:@_";
1200 0           my ($type,$types,$callback,$attributes,$iterator,$feature_id,$seq_id,$start,$end) =
1201             $self->_rearrange([qw(TYPE TYPES CALLBACK ATTRIBUTES ITERATOR FEATURE_ID SEQ_ID START END)],
1202             @_);
1203              
1204 0           my @features = $self->_segclass->features(-type => $type,
1205             -attributes => $attributes,
1206             -callback => $callback,
1207             -iterator => $iterator,
1208             -factory => $self,
1209             -feature_id=>$feature_id,
1210             -seq_id =>$seq_id,
1211             -start =>$start,
1212             -end =>$end,
1213             );
1214              
1215 0           return Bio::DB::Das::ChadoIterator->new(\@features);
1216              
1217              
1218             }
1219              
1220             =head2 types
1221              
1222             Title : types
1223             Usage : $db->types(@args)
1224             Function: return list of feature types in database
1225             Returns : a list of Bio::Das::FeatureTypeI objects
1226             Args : see below
1227              
1228             This routine returns a list of feature types known to the database. It
1229             is also possible to find out how many times each feature occurs.
1230              
1231             Arguments are -option=E<gt>value pairs as follows:
1232              
1233             -enumerate if true, count the features
1234              
1235             The returned value will be a list of Bio::Das::FeatureTypeI objects
1236             (see L<Bio::Das::FeatureTypeI>.
1237              
1238             If -enumerate is true, then the function returns a hash (not a hash
1239             reference) in which the keys are the stringified versions of
1240             Bio::Das::FeatureTypeI and the values are the number of times each
1241             feature appears in the database.
1242              
1243             NOTE: This currently raises a "not-implemented" exception, as the
1244             BioSQL API does not appear to provide this functionality.
1245              
1246             =cut
1247              
1248             sub types {
1249 0     0 1   my $self = shift;
1250 0           my ($enumerate) = $self->_rearrange([qw(ENUMERATE)],@_);
1251 0           $self->throw_not_implemented;
1252             #if lincoln didn't need to implement it, neither do I!
1253             }
1254              
1255             =head2 get_feature_by_alias, get_features_by_alias
1256              
1257             Title : get_features_by_alias
1258             Usage : $db->get_feature_by_alias(@args)
1259             Function: return list of feature whose name or synonyms match
1260             Returns : a list of Bio::Das::Chado::Segment::Feature objects
1261             Args : See below
1262              
1263             This method finds features matching the criteria outlined by the
1264             supplied arguments. Wildcards (*) are allowed. Valid arguments are:
1265              
1266             =over
1267              
1268             =item -name
1269              
1270             =item -class
1271              
1272             =item -ref (refrence sequence)
1273              
1274             =item -start
1275              
1276             =item -end
1277              
1278             =back
1279              
1280             =cut
1281              
1282              
1283             sub get_feature_by_alias {
1284 0     0 1   my $self = shift;
1285 0           my @args = @_;
1286              
1287 0 0         if ( @args == 1 ) {
1288 0           @args = (-name => $args[0]);
1289             }
1290              
1291 0           push @args, -operation => 'by_alias';
1292              
1293 0           return $self->_by_alias_by_name(@args);
1294             }
1295              
1296             *get_features_by_alias = \&get_feature_by_alias;
1297              
1298             =head2 get_feature_by_name, get_features_by_name
1299              
1300             Title : get_features_by_name
1301             Usage : $db->get_features_by_name(@args)
1302             Function: return list of feature whose names match
1303             Returns : a list of Bio::Das::Chado::Segment::Feature objects
1304             Args : See below
1305              
1306             This method finds features matching the criteria outlined by the
1307             supplied arguments. Wildcards (*) are allowed. Valid arguments are:
1308              
1309             =over
1310              
1311             =item -name
1312              
1313             =item -class
1314              
1315             =item -ref (refrence sequence)
1316              
1317             =item -start
1318              
1319             =item -end
1320              
1321             =back
1322              
1323             =cut
1324              
1325              
1326             *get_features_by_name = \&get_feature_by_name;
1327              
1328             sub get_feature_by_name {
1329 0     0 1   my $self = shift;
1330 0           my @args = @_;
1331              
1332 0           warn "in get_feature_by_name, args:@args" if DEBUG;
1333              
1334 0 0         if ( @args == 1 ) {
1335 0           @args = (-name => $args[0]);
1336             }
1337              
1338 0           push @args, -operation => 'by_name';
1339              
1340 0           return $self->_by_alias_by_name(@args);
1341             }
1342              
1343             =head2 _by_alias_by_name
1344              
1345             Title : _by_alias_by_name
1346             Usage : $db->_by_alias_by_name(@args)
1347             Function: return list of feature whose names match
1348             Returns : a list of Bio::Das::Chado::Segment::Feature objects
1349             Args : See below
1350              
1351             A private method that implements the get_features_by_name and
1352             get_features_by_alias methods. It accepts the same args as
1353             those methods, plus an addtional on (-operation) which is
1354             either 'by_alias' or 'by_name' to indicate what rule it is to
1355             use for finding features.
1356              
1357             =cut
1358              
1359             sub _by_alias_by_name {
1360 0     0     my $self = shift;
1361              
1362 0           my ($name, $class, $ref, $base_start, $stop, $operation)
1363             = $self->_rearrange([qw(NAME CLASS REF START END OPERATION)],@_);
1364              
1365 0 0         if ($name =~ /^id:(\d+)/) {
1366 0           my $feature_id = $1;
1367 0           return $self->get_feature_by_feature_id($feature_id);
1368             }
1369              
1370 0           my @temp_array = split /:/, $name;
1371 0 0         if (scalar @temp_array == 2) {
1372 0 0         if ($self->source2dbxref($temp_array[0]) > 0) {
1373 0           warn "assuming that the name with a colon ($name) is coming from a multiple hit search result (ie, is of the form 'source:name'";
1374 0           $name = $temp_array[1];
1375             }
1376             }
1377              
1378             ##I think this is where this should go...
1379             # We need to split the query on whitespaces, and replace the whitespace with &
1380             # so that we can get proper full test search on allquery terms [LP]
1381             # but it only make sense to do this for full text searching [Scott]
1382 0 0         $name = $self->_search_name_prep_spaces($name) if $self->fulltext;
1383              
1384              
1385 0           my $wildcard = 0;
1386 0 0         if ($name =~ /\*/) {
1387 0           $wildcard = 1;
1388 0           undef $class;
1389             }
1390              
1391 0           warn "name:$name in get_feature_by_name" if DEBUG;
1392              
1393             # $name = $self->_search_name_prep($name);
1394              
1395             # warn "name after protecting _ and % in the string:$name\n" if DEBUG;
1396              
1397 0           my (@features,$sth);
1398            
1399             # get feature_id
1400             # foreach feature_id, get the feature info
1401             # then get src_feature stuff (chromosome info) and create a parent feature,
1402              
1403 0           my ($select_part,$from_part,$where_part);
1404              
1405 0 0         if ($class) {
1406             #warn "class: $class";
1407 0 0 0       my $type = ($class eq 'CDS' && $self->inferCDS)
1408             ? $self->name2term('polypeptide')
1409             : $self->name2term($class);
1410 0 0         return unless $type;
1411              
1412 0 0         if (ref $type eq 'ARRAY') {
    0          
1413 0           $type = join(',',@$type);
1414             }
1415             elsif (ref $type eq 'HASH') {
1416 0           $type = join(',', map($$type{$_}, keys %$type) );
1417             }
1418 0           $from_part = " feature f ";
1419 0           $where_part.= " AND f.type_id in ( $type ) ";
1420             }
1421              
1422 0 0 0       if ($self->organism_id and $operation eq 'by_alias') {
    0          
1423 0 0         $where_part.= $self->use_all_feature_names()
1424             ? " AND afn.organism_id =".$self->organism_id
1425             : " AND f.organism_id =".$self->organism_id;
1426             }
1427             elsif ($self->organism_id) {
1428 0           $where_part.= " AND f.organism_id =".$self->organism_id;
1429             }
1430              
1431 0 0         if ( $operation eq 'by_alias') {
1432 0 0         if ($self->use_all_feature_names()) {
1433 0           $select_part = "select distinct afn.feature_id \n";
1434 0 0         $from_part = $from_part ?
1435             "$from_part join all_feature_names afn using (feature_id) "
1436             : "all_feature_names afn ";
1437              
1438 0           my $alias_only_where;
1439             # There is no difference in the wildcard or non-wildcard call to
1440             # the full-text search [LP]
1441 0 0         if ($self->fulltext) {
    0          
1442 0           $alias_only_where = "where afn.searchable_name @@ to_tsquery(?)";
1443             }
1444             elsif ($wildcard) {
1445 0           $alias_only_where = "where lower(afn.name) like ?";
1446             }
1447             else {
1448 0           $alias_only_where = "where lower(afn.name) = ?";
1449             }
1450              
1451 0 0         $where_part = $where_part ?
1452             "$alias_only_where $where_part"
1453             : $alias_only_where;
1454              
1455             }
1456             else { #need to use the synonym table
1457 0           $select_part = "select distinct fs.feature_id \n";
1458 0 0         $from_part = $from_part ?
1459             "$from_part join feature_synonym fs using (feature_id), synonym s "
1460             : "feature_synonym fs, synonym s ";
1461              
1462 0           my $alias_only_where;
1463             # Again, with full-text there's no difference in wildcard/non-wildcard [LP]
1464 0 0         if ($self->fulltext) {
    0          
1465 0           $alias_only_where = "where fs.synonym_id = s.synonym_id and\n"
1466             . "s.searchable_synonym_sgml @@ to_tsquery(?)";
1467             }
1468             elsif ($wildcard) {
1469 0           $alias_only_where = "where fs.synonym_id = s.synonym_id and\n"
1470             . "lower(s.synonym_sgml) like ?";
1471             }
1472             else {
1473 0           $alias_only_where = "where fs.synonym_id = s.synonym_id and\n"
1474             . "lower(s.synonym_sgml) = ?";
1475             }
1476              
1477              
1478 0 0         $where_part = $where_part ?
1479             "$alias_only_where $where_part"
1480             : $alias_only_where;
1481             }
1482             }
1483             else { #searching by name only
1484 0           $select_part = "select f.feature_id ";
1485 0           $from_part = " feature f ";
1486              
1487 0           my $name_only_where;
1488             # Using full text search we only need create one WHERE clause, regardless of
1489             # the presence of any wildcards... [LP]
1490 0 0         if ($self->fulltext) {
    0          
1491 0           $name_only_where = "where f.searchable_name @@ to_tsquery(?)";
1492             }
1493             elsif ($wildcard) {
1494 0           $name_only_where = "where lower(f.name) like ?";
1495             }
1496             else {
1497 0           $name_only_where = "where lower(f.name) = ?";
1498             }
1499              
1500              
1501 0 0         $where_part = $where_part ?
1502             "$name_only_where $where_part"
1503             : $name_only_where;
1504             }
1505              
1506 0           my $query = $select_part . ' FROM ' . $from_part . $where_part;
1507              
1508             # Added at suggestion of James Ward to strip confusing/fatal whitespace,
1509             # so we trim leading and trailing whitespace before processing query [LP]
1510 0           $query =~ s/^[ \t\r\n]+|[ \t\r\n]$//g;
1511              
1512              
1513 0           warn "first get_feature_by_name query:$query" if DEBUG;
1514              
1515 0           $sth = $self->dbh->prepare($query);
1516              
1517 0 0         if ($wildcard) {
1518 0           $name = $self->_search_name_prep($name);
1519 0           warn "name after protecting _ and % in the string:$name\n" if DEBUG;
1520             }
1521              
1522             # what the hell happened to the lower casing!!!
1523             # left over bug from making the adaptor case insensitive?
1524              
1525             #$name = lc($name);
1526            
1527 0 0         $sth->execute(lc($name)) or $self->throw("getting the feature_ids failed");
1528              
1529             # this makes performance awful! It does a wildcard search on a view
1530             # that has several selects in it. For any reasonably sized database,
1531             # this won't work.
1532             #
1533             # if ($sth->rows < 1 and
1534             # $class ne 'chromosome' and
1535             # $class ne 'region' and
1536             # $class ne 'contig') {
1537             #
1538             # my $query;
1539             # ($name,$query) = $self->_complex_search($name,$class,$wildcard);
1540             #
1541             # warn "complex_search query:$query\n";
1542             #
1543             # $sth = $self->dbh->prepare($query);
1544             # $sth->execute($name) or $self->throw("getting the feature_ids failed");
1545             #
1546             # }
1547              
1548              
1549             # prepare sql queries for use in while loops
1550              
1551 0           my $isth = $self->dbh->prepare("
1552             select f.feature_id, f.name, f.type_id,f.uniquename,af.significance as score,
1553             fl.fmin,fl.fmax,fl.strand,fl.phase, fl.srcfeature_id, fd.dbxref_id,
1554             f.is_obsolete,f.seqlen
1555             from feature f join featureloc fl using (feature_id)
1556             left join analysisfeature af using (feature_id)
1557             left join feature_dbxref fd using (feature_id)
1558             where
1559             f.feature_id = ? and fl.rank=0 and
1560             (fd.dbxref_id is null or fd.dbxref_id in
1561             (select dbxref_id from dbxref where db_id = ?))
1562             order by fl.srcfeature_id
1563             ");
1564              
1565 0           my $jsth = $self->dbh->prepare("select name from feature
1566             where feature_id = ?");
1567              
1568             # getting feature info
1569 0           while (my $feature_id_ref = $sth->fetchrow_hashref) {
1570              
1571 0           warn "feature_id in features method loop:".$$feature_id_ref{feature_id} if DEBUG;
1572              
1573 0 0         $isth->execute($$feature_id_ref{'feature_id'},$self->gff_source_db_id)
1574             or $self->throw("getting feature info failed");
1575              
1576 0 0         if ($isth->rows == 0) { #this might be a srcfeature
1577              
1578 0           warn "$name might be a srcfeature" if DEBUG;
1579              
1580 0           my $is_srcfeature_query = $self->dbh->prepare("
1581             select srcfeature_id from featureloc where srcfeature_id=? limit 1
1582             ");
1583 0 0         $is_srcfeature_query->execute($$feature_id_ref{'feature_id'})
1584             or $self->throw("checking if feature is a srcfeature failed");
1585              
1586 0           $sth->finish;
1587 0           $isth->finish;
1588 0           $jsth->finish;
1589 0 0         if ($is_srcfeature_query->rows == 1) {#yep, its a srcfeature
1590             #build a feature out of the srcfeature:
1591 0           warn "Yep, $name is a srcfeature" if DEBUG;
1592              
1593 0           my @args = ($name) ;
1594 0 0         push @args, $base_start if $base_start;
1595 0 0         push @args, $stop if $stop;
1596              
1597 0           warn "srcfeature args:$args[0]" if DEBUG;
1598              
1599 0           my @seg = ($self->segment(@args));
1600              
1601 0           $is_srcfeature_query->finish;
1602 0           return @seg;
1603             }
1604             else {
1605 0           $is_srcfeature_query->finish;
1606 0           return; #I got nothing!
1607             }
1608             }
1609              
1610             #getting chromosome info
1611 0           my $old_srcfeature_id=-1;
1612 0           my $parent_segment;
1613 0           while (my $hashref = $isth->fetchrow_hashref) {
1614              
1615 0 0 0       next if ($$hashref{'is_obsolete'} and !$self->allow_obsolete);
1616              
1617 0 0 0       if ($self->refclass && $$hashref{type_id} == $self->refclass) {
1618             #this feature is supposed to be a reference feature
1619 0           my $f = Bio::DB::Das::Chado::Segment->new($$hashref{'name'},
1620             $self,
1621             1,$$hashref{'seqlen'},
1622             $$hashref{'uniquename'},
1623             undef,
1624             $$hashref{'feature_id'},
1625             undef);
1626 0           push @features,$f;
1627 0           next;
1628             }
1629              
1630 0 0         if ($$hashref{'srcfeature_id'} != $old_srcfeature_id) {
1631 0 0         $jsth->execute($$hashref{'srcfeature_id'})
1632             or die ("getting assembly info failed");
1633 0           my $src_name = $jsth->fetchrow_hashref;
1634 0           warn "src_name:$$src_name{'name'}" if DEBUG;
1635 0           $parent_segment =
1636             Bio::DB::Das::Chado::Segment->new($$src_name{'name'},$self,undef,undef,undef,undef,$$hashref{'srcfeature_id'});
1637 0           $old_srcfeature_id=$$hashref{'srcfeature_id'};
1638             }
1639             #now build the feature
1640              
1641             #Recursive Mapping
1642 0 0         if ($self->{recursivMapping}){
1643             #Fetch the recursively mapped position
1644              
1645 0           my $sql = "select fl.fmin,fl.fmax,fl.strand,fl.phase
1646             from feat_remapping(?) fl
1647             where fl.rank=0";
1648 0           my $recurs_sth = $self->dbh->prepare($sql);
1649 0           $sql =~ s/\s+/ /gs ;
1650 0           $recurs_sth->execute($$feature_id_ref{'feature_id'});
1651 0           my $hashref2 = $recurs_sth->fetchrow_hashref;
1652 0           my $strand_ = $$hashref{'strand'};
1653 0           my $phase_ = $$hashref{'phase'};
1654 0           my $fmax_ = $$hashref{'fmax'};
1655 0           my $interbase_start;
1656              
1657             #If unable to recursively map we assume that the feature is
1658             # already mapped on the lowest refseq
1659              
1660 0 0         if ($recurs_sth->rows != 0){
1661 0           $interbase_start = $$hashref2{'fmin'};
1662 0           $strand_ = $$hashref2{'strand'};
1663 0           $phase_ = $$hashref2{'phase'};
1664 0           $fmax_ = $$hashref2{'fmax'};
1665             }else{
1666 0           $interbase_start = $$hashref{'fmin'};
1667             }
1668 0           $base_start = $interbase_start +1;
1669              
1670 0   0       my $type_obj = Bio::DB::GFF::Typename->new(
1671             $self->term2name($$hashref{type_id}),
1672             $self->dbxref2source($$hashref{dbxref_id}) || "");
1673              
1674 0           my $feat = Bio::DB::Das::Chado::Segment::Feature->new(
1675             $self,
1676             $parent_segment,
1677             $parent_segment->seq_id,
1678             $base_start,$fmax_,
1679             $self->term2name($$hashref{'type_id'}),
1680             $$hashref{'score'},
1681             $strand_,
1682             $phase_,
1683             $$hashref{'name'},
1684             $$hashref{'uniquename'},
1685             $$hashref{'feature_id'}
1686             );
1687 0           push @features, $feat;
1688 0           $recurs_sth->finish;
1689             #END Recursive Mapping
1690             } else {
1691            
1692 0 0 0       if ($class && $class eq 'CDS' && $self->inferCDS) {
      0        
1693             #$hashref holds info for the polypeptide
1694 0           my $poly_min = $$hashref{'fmin'};
1695 0           my $poly_max = $$hashref{'fmax'};
1696 0           my $poly_fid = $$hashref{'feature_id'};
1697              
1698             #get fid of parent transcript
1699 0           my $id_list = ref $self->term2name('derives_from') eq 'ARRAY'
1700 0 0         ? "in (".join(",",@{$self->term2name('derives_from')}).")"
1701             : "= ".$self->term2name('derives_from');
1702              
1703 0           my $transcript_query = $self->dbh->prepare("
1704             SELECT object_id FROM feature_relationship
1705             WHERE type_id ".$id_list
1706             ." AND subject_id = $poly_fid"
1707             );
1708              
1709 0           $transcript_query->execute;
1710 0           my ($trans_id) = $transcript_query->fetchrow_array;
1711              
1712 0           $id_list = ref $self->term2name('part_of') eq 'ARRAY'
1713 0 0         ? "in (".join(",",@{$self->term2name('part_of')}).")"
1714             : "= ".$self->term2name('part_of');
1715              
1716             #now get exons that are part of the transcript
1717 0           my $exon_query = $self->dbh->prepare("
1718             SELECT f.feature_id,f.name,f.type_id,f.uniquename,
1719             af.significance as score,fl.fmin,fl.fmax,fl.strand,
1720             fl.phase, fl.srcfeature_id, fd.dbxref_id,f.is_obsolete
1721             FROM feature f join featureloc fl using (feature_id)
1722             left join analysisfeature af using (feature_id)
1723             left join feature_dbxref fd using (feature_id)
1724             WHERE
1725             f.type_id = ".$self->term2name('exon')." and f.feature_id in
1726             (select subject_id from feature_relationship where object_id = $trans_id and
1727             type_id ".$id_list." ) and
1728             fl.rank=0 and
1729             (fd.dbxref_id is null or fd.dbxref_id in
1730             (select dbxref_id from dbxref where db_id =".$self->gff_source_db_id."))
1731             ");
1732              
1733 0           $exon_query->execute();
1734              
1735 0           while (my $exonref = $exon_query->fetchrow_hashref) {
1736 0 0         next if ($$exonref{fmax} < $poly_min);
1737 0 0         next if ($$exonref{fmin} > $poly_max);
1738 0 0 0       next if ($$exonref{is_obsolete} and !$self->allow_obsolete);
1739              
1740 0           my ($start,$stop);
1741 0 0 0       if ($$exonref{fmin} <= $poly_min && $$exonref{fmax} >= $poly_max) {
1742             #the exon starts before polypeptide start
1743 0           $start = $poly_min +1;
1744             }
1745             else {
1746 0           $start = $$exonref{fmin} +1;
1747             }
1748              
1749 0 0 0       if ($$exonref{fmax} >= $poly_max && $$exonref{fmin} <= $poly_min) {
1750 0           $stop = $poly_max;
1751             }
1752             else {
1753 0           $stop = $$exonref{fmax};
1754             }
1755              
1756 0   0       my $type_obj = Bio::DB::GFF::Typename->new(
1757             'CDS',
1758             $self->dbxref2source($$hashref{'dbxref_id'}) || '');
1759              
1760              
1761 0           my $feat = Bio::DB::Das::Chado::Segment::Feature->new(
1762             $self,
1763             $parent_segment,
1764             $parent_segment->seq_id,
1765             $start,$stop,
1766             $type_obj,
1767             $$hashref{'score'},
1768             $$hashref{'strand'},
1769             $$hashref{'phase'},
1770             $$hashref{'name'},
1771             $$hashref{'uniquename'},
1772             $$hashref{'feature_id'}
1773             );
1774 0           push @features, $feat;
1775             }
1776 0           $exon_query->finish;
1777 0           $transcript_query->finish;
1778             }
1779             else {
1780             #the normal case where you don't infer CDS features
1781 0           my $interbase_start = $$hashref{'fmin'};
1782 0           $base_start = $interbase_start +1;
1783              
1784 0   0       my $type_obj = Bio::DB::GFF::Typename->new(
1785             $self->term2name($$hashref{'type_id'}),
1786             $self->dbxref2source($$hashref{'dbxref_id'}) || '');
1787              
1788 0 0         my $srcf = 1 if ($self->refclass() == $$hashref{'type_id'}) ;
1789            
1790 0 0         my $feat = Bio::DB::Das::Chado::Segment::Feature->new(
    0          
1791             $self,
1792             $srcf ? '' : $parent_segment,
1793             $srcf ? '' : $parent_segment->seq_id,
1794             $base_start,$$hashref{'fmax'},
1795             $type_obj,
1796             $$hashref{'score'},
1797             $$hashref{'strand'},
1798             $$hashref{'phase'},
1799             $$hashref{'name'},
1800             $$hashref{'uniquename'},
1801             $$hashref{'feature_id'}
1802             );
1803              
1804 0           push @features, $feat;
1805             }
1806             }
1807             }
1808             }
1809 0           $sth->finish;
1810 0           $isth->finish;
1811 0           $jsth->finish;
1812 0           return @features;
1813             }
1814              
1815             # Handle spaces in search query; we need to avoid replacing
1816             # ' & ' with ' & & & ', though... [LP]
1817             sub _search_name_prep_spaces {
1818 0     0     my $self = shift;
1819 0           my $name = shift;
1820              
1821 0           $name =~ s/\s&\s/ /g; # Replace any user-defined ' & ' with spaces...
1822 0           $name =~ s/\s/ & /g; # then replace all spaces with ' & '
1823              
1824 0           return $name;
1825             }
1826              
1827              
1828             *fetch_feature_by_name = \&get_feature_by_name;
1829              
1830             sub get_feature_by_feature_id {
1831 0     0 0   my $self = shift;
1832 0           my $f_id = shift;
1833              
1834 0           my @features = $self->features(-feature_id => $f_id);
1835 0           return @features;
1836             }
1837              
1838             sub get_feature_by_id {
1839 0     0 1   my $self = shift;
1840 0           my $f_id = shift;
1841              
1842 0           my @features = $self->features(-feature_id => $f_id);
1843 0           return $features[0];
1844             }
1845              
1846             *fetch = *get_feature_by_primary_id = \&get_feature_by_feature_id;
1847              
1848             sub _complex_search {
1849 0     0     my $self = shift;
1850 0           my $name = shift;
1851 0           my $class= shift;
1852              
1853 0           warn "name before wildcard subs:$name\n" if DEBUG;
1854              
1855 0 0         $name = "\%$name" unless (0 == index($name, "%"));
1856 0 0         $name = "$name%" unless (0 == index(reverse($name), "%"));
1857              
1858 0           warn "name after wildcard subs:$name\n" if DEBUG;
1859              
1860 0           my $select_part = "select ga.feature_id ";
1861 0           my $from_part = "from gffatts ga ";
1862 0           my $where_part = "where lower(ga.attribute) like ? ";
1863            
1864 0 0         if ($class) {
1865 0           my $type = $self->name2term($class);
1866 0 0         return unless $type;
1867 0           $from_part .= ", feature f ";
1868 0           $where_part.= "and ga.feature_id = f.feature_id and "
1869             ."f.type_id = $type";
1870             }
1871              
1872 0 0         $where_part .= " and organism_id = ".$self->organism_id
1873             if $self->organism_id;
1874            
1875 0           my $query = $select_part . $from_part . $where_part;
1876 0           return ($name, $query);
1877             }
1878              
1879             sub _search_name_prep {
1880 0     0     my $self = shift;
1881 0           my $name = shift;
1882              
1883 0 0         if ($self->fulltext) {
1884              
1885             # For full-text search, the appropriate extension wildcard
1886             # is ':*' for prefix-matching. There are limitations to
1887             # full-text search in that we cannot find internal parts of
1888             # words, so wildcards can only come at the ends of phrases/
1889             # lexemes. Internal * are converted by tsquery into & [LP]
1890 0           $name =~ s/_/\\_/g; # escape underscores in name
1891 0           $name =~ s/(?<=\s)\*//g; # lose prefix wildcards (word start)
1892 0           $name =~ s/(?<=^)\*//g; # lose prefix wildcards (query start)
1893 0           $name =~ s/\*(?=$)/:\*/g; # convert trailing * (query end) into :*
1894 0           $name =~ s/\*(?=\s)/:\*/g; # convert trailing * (word end) into :*
1895              
1896             }
1897             else {
1898 0           $name =~ s/_/\\_/g; # escape underscores in name
1899 0           $name =~ s/\%/\\%/g; # ditto for percent signs
1900              
1901 0           $name =~ s/\*/%/g;
1902             }
1903              
1904 0           return $name;
1905             }
1906              
1907              
1908             =head2 srcfeature2name
1909              
1910             returns a srcfeature name given a srcfeature_id
1911              
1912             =cut
1913              
1914             sub srcfeature2name {
1915 0     0 1   my $self = shift;
1916 0           my $id = shift;
1917              
1918 0 0         return $self->{'srcfeature_id'}->{$id} if $self->{'srcfeature_id'}->{$id};
1919              
1920 0           my $sth = $self->dbh->prepare("select name from feature "
1921             ."where feature_id = ?");
1922 0           $sth->execute($id);
1923              
1924 0           my $hashref = $sth->fetchrow_hashref;
1925 0           $self->{'srcfeature_id'}->{$id} = $$hashref{'name'};
1926              
1927 0           $sth->finish;
1928 0           return $self->{'srcfeature_id'}->{$id};
1929             }
1930              
1931             =head2 gff_source_db_id
1932              
1933             Title : gff_source_db_id
1934             Function: caches the chado db_id from the chado db table
1935              
1936             =cut
1937              
1938             sub gff_source_db_id {
1939 0     0 1   my $self = shift;
1940 0 0         return $self->{'gff_source_db_id'} if $self->{'gff_source_db_id'};
1941              
1942 0           my $sth = $self->dbh->prepare("
1943             select db_id from db
1944             where name = 'GFF_source'");
1945 0           $sth->execute();
1946              
1947 0           my $hashref = $sth->fetchrow_hashref;
1948 0           $self->{'gff_source_db_id'} = $$hashref{'db_id'};
1949              
1950 0           $sth->finish;
1951 0           return $self->{'gff_source_db_id'};
1952             }
1953              
1954             =head2 gff_source_dbxref_id
1955              
1956             Gets dbxref_id for features that have a gff source associated
1957              
1958             =cut
1959              
1960             sub source2dbxref {
1961 0     0 0   my $self = shift;
1962 0           my $source = shift;
1963              
1964             #Why was this here? Debugging?
1965             #return 'fake' unless defined($self->gff_source_db_id);
1966              
1967 0 0         return $self->{'source_dbxref'}->{$source}
1968             if $self->{'source_dbxref'}->{$source};
1969              
1970 0           my $sth = $self->dbh->prepare("
1971             select dbxref_id,accession from dbxref where db_id= ?"
1972             );
1973 0           $sth->execute($self->gff_source_db_id);
1974              
1975 0           while (my $hashref = $sth->fetchrow_hashref) {
1976 0           warn "s2d:accession:$$hashref{accession}, dbxref_id:$$hashref{dbxref_id}\n" if DEBUG;
1977              
1978 0           $self->{'source_dbxref'}->{$$hashref{accession}} = $$hashref{dbxref_id};
1979 0           $self->{'dbxref_source'}->{$$hashref{dbxref_id}} = $$hashref{accession};
1980             }
1981              
1982 0           $sth->finish;
1983 0           return $self->{'source_dbxref'}->{$source};
1984             }
1985              
1986             =head2 dbxref2source
1987              
1988             returns the source (string) when given a dbxref_id
1989              
1990             =cut
1991              
1992             sub dbxref2source {
1993 0     0 1   my $self = shift;
1994 0           my $dbxref = shift;
1995              
1996 0 0         return '.' unless defined($self->gff_source_db_id);
1997              
1998 0           warn "d2s:dbxref:$dbxref\n" if DEBUG;
1999              
2000 0 0 0       if (defined ($self->{'dbxref_source'}) && $dbxref
      0        
2001             && defined ($self->{'dbxref_source'}->{$dbxref})) {
2002 0           return $self->{'dbxref_source'}->{$dbxref};
2003             }
2004              
2005 0           my $sth = $self->dbh->prepare("
2006             select dbxref_id,accession from dbxref where db_id=?"
2007             );
2008 0           $sth->execute($self->gff_source_db_id);
2009              
2010 0 0         if ($sth->rows < 1) {
2011 0           $sth->finish;
2012 0           return ".";
2013             }
2014              
2015 0           while (my $hashref = $sth->fetchrow_hashref) {
2016 0           warn "d2s:accession:$$hashref{accession}, dbxref_id:$$hashref{dbxref_id}\n"
2017             if DEBUG;
2018              
2019 0           $self->{'source_dbxref'}->{$$hashref{accession}} = $$hashref{dbxref_id};
2020 0           $self->{'dbxref_source'}->{$$hashref{dbxref_id}} = $$hashref{accession};
2021             }
2022            
2023 0           $sth->finish;
2024 0 0 0       if (defined $self->{'dbxref_source'} && $dbxref
      0        
2025             && defined $self->{'dbxref_source'}->{$dbxref}) {
2026 0           return $self->{'dbxref_source'}->{$dbxref};
2027             } else {
2028 0 0         $self->{'dbxref_source'}->{$dbxref} = "." if $dbxref;
2029 0           return ".";
2030             }
2031             }
2032              
2033             =head2 source_dbxref_list
2034              
2035             Title : source_dbxref_list
2036             Usage : @all_dbxref_ids = $db->source_dbxref_list()
2037             Function: Gets a list of all dbxref_ids that are used for GFF sources
2038             Returns : a comma delimited string that is a list of dbxref_ids
2039             Args : none
2040             Status : public
2041              
2042             This method queries the database for all dbxref_ids that are used
2043             to store GFF source terms.
2044              
2045             =cut
2046              
2047             sub source_dbxref_list {
2048 0     0 1   my $self = shift;
2049 0 0         return $self->{'source_dbxref_list'} if defined $self->{'source_dbxref_list'};
2050              
2051 0           my $query = "select dbxref_id from dbxref where db_id = ?";
2052 0           my $sth = $self->dbh->prepare($query);
2053 0           $sth->execute($self->gff_source_db_id);
2054              
2055             #unpack it here to make it easier
2056 0           my @dbxref_list;
2057 0           while (my $row = $sth->fetchrow_arrayref) {
2058 0           push @dbxref_list, $$row[0];
2059             }
2060              
2061 0           $sth->finish;
2062 0           $self->{'source_dbxref_list'} = join (",",@dbxref_list);
2063 0           return $self->{'source_dbxref_list'};
2064             }
2065              
2066              
2067             =head2 search_notes
2068              
2069             Title : search_notes
2070             Usage : $db->search_notes($search_term,$max_results)
2071             Function: full-text search on features, ENSEMBL-style
2072             Returns : an array of [$name,$description,$score]
2073             Args : see below
2074             Status : public
2075              
2076             This routine performs a full-text search on feature attributes (which
2077             attributes depend on implementation) and returns a list of
2078             [$name,$description,$score], where $name is the feature ID (accession?),
2079             $description is a human-readable description such as a locus line, and
2080             $score is the match strength.
2081              
2082             =cut
2083              
2084             =head2 ** NOT YET ACTIVE: search_notes IS IN TESTING STAGE **
2085              
2086             sub search_notes {
2087             my $self = shift;
2088             my ($search_string,$limit) = @_;
2089             my $limit_str;
2090             if (defined $limit) {
2091             $limit_str = " LIMIT $limit ";
2092             } else {
2093             $limit_str = "";
2094             }
2095              
2096             # so here's the plan:
2097             # if there is only 1 word, do 1-3
2098             # 1. search for accessions like $string.'%'--if any are found, quit and return them
2099             # 2. search for feature.name like $string.'%'--if found, keep and continue
2100             # 3. search somewhere in analysis like $string.'%'--if found, keep and continue
2101             # if there is more than one word, don't search accessions
2102             # 4. search each word anded together like '%'.$string.'%' --if found, keep and continue
2103             # 5. search somewhere in analysis like '%'.$string.'%'
2104              
2105             # $self->dbh->trace(1);
2106              
2107             my @search_str = split /\s+/, $search_string;
2108             my $qsearch_term = $self->dbh->quote($search_str[0]);
2109             my $like_str = "( (dbx.accession ~* $qsearch_term OR \n"
2110             ." f.name ~* $qsearch_term) ";
2111             for (my $i=1;$i<(scalar @search_str);$i++) {
2112             $qsearch_term = $self->dbh->quote($search_str[$i]);
2113             $like_str .= "and \n";
2114             $like_str .= " (dbx.accession ~* $qsearch_term OR \n"
2115             ." f.name ~* $qsearch_term) ";
2116             }
2117             $like_str .= ")";
2118              
2119             my $sth = $self->dbh->prepare("
2120             select dbx.accession,f.name,0
2121             from feature f, dbxref dbx, feature_dbxref fd
2122             where
2123             f.feature_id = fd.feature_id and
2124             fd.dbxref_id = dbx.dbxref_id and
2125             $like_str
2126             $limit_str
2127             ");
2128             $sth->execute or throw ("couldn't execute keyword query");
2129              
2130             my @results;
2131             while (my ($acc, $name, $score) = $sth->fetchrow_array) {
2132             $score = sprintf("%.2f",$score);
2133             push @results, [$acc, $name, $score];
2134             }
2135             $sth->finish;
2136             return @results;
2137             }
2138              
2139             =cut
2140              
2141             =head2 attributes
2142              
2143             Title : attributes
2144             Usage : @attributes = $db->attributes($id,$name)
2145             Function: get the "attributes" on a particular feature
2146             Returns : an array of string
2147             Args : feature ID [, attribute name]
2148             Status : public
2149              
2150             This method is intended as a "work-alike" to Bio::DB::GFF's
2151             attributes method, which has the following returns:
2152              
2153             Called in list context, it returns a list. If called in a
2154             scalar context, it returns the first value of the attribute
2155             if an attribute name is provided, otherwise it returns a
2156             hash reference in which the keys are attribute names
2157             and the values are anonymous arrays containing the values.
2158              
2159             =cut
2160              
2161             sub attributes {
2162 0     0 1   my $self = shift;
2163 0           my ($id,$tag) = @_;
2164              
2165             #get feature_id
2166              
2167 0           my $query = "select feature_id from feature where uniquename = ?";
2168 0 0         $query .= " and organism_id = ".$self->organism_id if $self->organism_id;
2169              
2170 0           my $sth = $self->dbh->prepare($query);
2171 0 0         $sth->execute($id) or $self->throw("failed to get feature_id in attributes");
2172 0           my $hashref = $sth->fetchrow_hashref;
2173 0           my $feature_id = $$hashref{'feature_id'};
2174              
2175 0 0         if (defined $tag) {
2176 0           my $query = "SELECT attribute FROM gfffeatureatts(?) WHERE type = ?";
2177 0           $sth = $self->dbh->prepare($query);
2178 0           $sth->execute($feature_id,$tag);
2179             } else {
2180 0           my $query = "SELECT type,attribute FROM gfffeatureatts(?)";
2181 0           $sth = $self->dbh->prepare($query);
2182 0           $sth->execute($feature_id);
2183             }
2184              
2185 0           my $arrayref = $sth->fetchall_arrayref;
2186              
2187 0           my @array = @$arrayref;
2188 0 0 0       ($sth->finish && return ()) if scalar @array == 0;
2189              
2190             ## dgg; ugly patch to copy polypeptide/protein residues into 'translation' attribute
2191             # need to add to gfffeatureatts ..
2192 0 0 0       if (!defined $tag || $tag eq 'translation') {
2193 0           $sth = $self->dbh->prepare("select type_id from feature where feature_id = ?");
2194 0           $sth->execute($feature_id); # or $self->throw("failed to get feature_id in attributes");
2195 0           $hashref = $sth->fetchrow_hashref;
2196 0           my $type_id = $$hashref{'type_id'};
2197             ## warn("DEBUG: dgg ugly prot. patch; type=$type_id for ftid=$feature_id\n");
2198            
2199 0 0 0       if( $self->name2term('polypeptide')
      0        
      0        
      0        
      0        
2200             && $type_id
2201             && $type_id == $self->name2term('polypeptide')
2202             || $self->name2term('protein')
2203             && $type_id
2204             && $type_id == $self->name2term('protein')
2205             ) {
2206 0           $sth = $self->dbh->prepare("select residues from feature where feature_id = ?");
2207 0           $sth->execute($feature_id); # or $self->throw("failed to get feature_id in attributes");
2208 0           $hashref = $sth->fetchrow_hashref;
2209 0           my $aa = $$hashref{'residues'};
2210 0 0         if($aa) {
2211             ## warn("DEBUG: dgg ugly prot. patch; aalen=",length($aa),"\n");
2212             ## this wasn't working till I added in a featureprop 'translation=dummy' .. why?
2213 0 0         if($tag) { push( @array, [ $aa]); }
  0            
2214 0           else { push( @array, ['translation', $aa]); }
2215             }
2216             }
2217             }
2218            
2219 0           my @result;
2220 0           foreach my $lineref (@array) {
2221 0           my @la = @$lineref;
2222 0           push @result, @la;
2223             }
2224              
2225 0           $sth->finish;
2226 0 0         return @result if wantarray;
2227              
2228 0 0         return $result[0] if $tag;
2229              
2230 0           my %result;
2231              
2232 0           foreach my $lineref (@array) {
2233 0           my ($key,$value) = splice(@$lineref,0,2);
2234 0           push @{$result{$key}},$value;
  0            
2235             }
2236 0           return \%result;
2237              
2238             }
2239              
2240              
2241              
2242             =head2 _segclass
2243              
2244             Title : _segclass
2245             Usage : $class = $db->_segclass
2246             Function: returns the perl class that we use for segment() calls
2247             Returns : a string containing the segment class
2248             Args : none
2249             Status : reserved for subclass use
2250              
2251             =cut
2252              
2253             #sub default_class {return 'Sequence' }
2254             ## URGI changes
2255             sub default_class {
2256              
2257 0     0 0   my $self = shift;
2258              
2259             #dgg
2260 0 0 0       unless( $self->{'reference_class'} || @_ ) {
2261 0           $self->{'reference_class'} = $self->chado_reference_class();
2262             }
2263            
2264 0 0         if(@_) {
2265 0           my $checkref = $self->check_chado_reference_class(@_);
2266 0 0         unless($checkref) {
2267 0           $self->throw("unable to find reference_class '$_[0]' feature in the database");
2268             }
2269             }
2270            
2271 0 0 0       $self->{'reference_class'} = shift || 'Sequence' if(@_);
2272              
2273 0           return $self->{'reference_class'};
2274              
2275             }
2276              
2277             sub check_chado_reference_class {
2278 0     0 0   my $self = shift;
2279 0 0         if(@_) {
2280 0           my $refclass= shift;
2281 0           my $type_id = $self->name2term($refclass);
2282 0           my $query = "select feature_id from feature where type_id = ?";
2283 0           my $sth = $self->dbh->prepare($query);
2284 0 0         $sth->execute($type_id) or $self->throw("trying to find chado_reference_class");
2285 0           my $data = $sth->fetchrow_hashref();
2286 0           my $refid= $$data{'feature_id'};
2287             ## warn("check_chado_reference_class: $refclass = $type_id -> $refid"); # DEBUG
2288              
2289 0           $sth->finish;
2290 0           return $refid;
2291             }
2292             }
2293              
2294             =head2 chado_reference_class
2295              
2296             Title : chado_reference_class
2297             Usage : $obj->chado_reference_class()
2298             Function: get or return the ID to use for Gbrowse map reference class
2299             using cvtermprop table, value = MAP_REFERENCE_TYPE
2300             Returns : the cvterm.name
2301             Args : to return the id, none; to determine the id, 1
2302             See also: default_class, refclass_feature_id
2303              
2304             Optionally test that user/config supplied ref class is indeed a proper
2305             chado feature type.
2306            
2307             =cut
2308              
2309              
2310             sub chado_reference_class {
2311 0     0 1   my $self = shift;
2312 0 0         return $self->{'chado_reference_class'} if($self->{'chado_reference_class'});
2313              
2314 0           my $chado_reference_class='Sequence'; # default ?
2315            
2316 0           my $query = "select cvterm_id from cvtermprop where value = ?";
2317 0           my $sth = $self->dbh->prepare($query);
2318 0 0         $sth->execute(MAP_REFERENCE_TYPE) or $self->throw("trying to find chado_reference_class");
2319 0           my $data = $sth->fetchrow_hashref(); #? FIXME: could be many values *?
2320 0           my $ref_cvtermid = $$data{'cvterm_id'};
2321            
2322 0           $sth->finish;
2323 0 0         if($ref_cvtermid) {
2324 0           $query = "select name from cvterm where cvterm_id = ?";
2325 0           $sth = $self->dbh->prepare($query);
2326 0 0         $sth->execute($ref_cvtermid) or $self->throw("trying to find chado_reference_class");
2327 0           $data = $sth->fetchrow_hashref();
2328 0 0         $chado_reference_class = $$data{'name'} if ($$data{'name'});
2329             # warn("chado_reference_class: $chado_reference_class = $ref_cvtermid"); # DEBUG
2330 0           $sth->finish;
2331             }
2332              
2333 0           return $self->{'chado_reference_class'} = $chado_reference_class;
2334             }
2335              
2336              
2337             =head2 refclass_feature_id
2338              
2339             Title : refclass_feature_id
2340             Usage : $self->refclass_srcfeature_id()
2341             Function: Used to store the feature_id of the reference class feature we are working on (e.g. contig, supercontig)
2342             With this feature we can filter out all the request to be sure we are extracting a feature located on
2343             the reference class feature.
2344             Returns : A scalar
2345             Args : The feature_id on setting
2346              
2347             =cut
2348              
2349             sub refclass_feature_id {
2350              
2351 0     0 1   my $self = shift;
2352              
2353 0 0         $self->{'refclass_feature_id'} = shift if(@_);
2354              
2355 0           return $self->{'refclass_feature_id'};
2356              
2357             }
2358              
2359              
2360 0     0     sub _segclass { return SEGCLASS }
2361              
2362 0     0 0   sub absolute {return}
2363              
2364             #implemented exactly the same as Bio::DB::SeqFeature::Store::DBI::mysql
2365             sub clone {
2366             #this is EO's implementation for the BDSFS::DBI::Pg implementation
2367             #he says Pg's clone method is flawed
2368 0     0 1   my $self = shift;
2369              
2370             # my $dsn = $self->{db_args}->{dsn};
2371             # my $user = $self->{db_args}->{username};
2372             # my $pass = $self->{db_args}->{password};
2373              
2374             # $self->dbh()->{InactiveDestroy} = 1;
2375             # my $new_dbh = DBI->connect($dsn,$user,$pass) or $self->throw($DBI::errstr);
2376             # $new_dbh->{InactiveDestroy} = 1;
2377             # $self->{dbh} = $new_dbh unless $self->is_temp;
2378              
2379              
2380             # this is the BDSFS::DBI::mysql implementation
2381 0           $self->{dbh}{InactiveDestroy} = 1;
2382 0           $self->{dbh} = $self->{dbh}->clone({})
2383             #magic from perlmonks to silence a warning:
2384             # http://www.perlmonks.org/?node_id=594175
2385             # without the empty {} you get warnings about unrecognised attribute name
2386             ; # unless $self->is_temp;
2387             }
2388              
2389              
2390             #this sub doesn't work and just causes annoying warnings
2391             #sub DESTROY {
2392             # my $self = shift;
2393             # $self->dbh->disconnect;
2394             # return;
2395             #}
2396              
2397             =head1 LEFTOVERS FROM BIO::DB::GFF NEEDED FOR DAS
2398              
2399             these methods should probably be declared in an interface class
2400             that Bio::DB::GFF implements. for instance, the aggregator methods
2401             could be described in Bio::SeqFeature::AggregatorI
2402              
2403             =cut
2404              
2405 0     0 0   sub aggregators { return(); }
2406              
2407             =head1 END LEFTOVERS
2408              
2409             =cut
2410              
2411              
2412             package Bio::DB::Das::ChadoIterator;
2413              
2414             sub new {
2415 0     0     my $package = shift;
2416 0           my $features = shift;
2417 0           return bless $features,$package;
2418             }
2419              
2420             sub next_seq {
2421 0     0     my $self = shift;
2422 0 0         return unless @$self;
2423 0           my $next_feature = shift @$self;
2424 0           return $next_feature;
2425             }
2426              
2427             1;
2428              
2429              
2430