File Coverage

blib/lib/Bio/DOOP/Util/Search.pm
Criterion Covered Total %
statement 6 128 4.6
branch n/a
condition n/a
subroutine 2 17 11.7
pod 15 15 100.0
total 23 160 14.3


line stmt bran cond sub pod time code
1             package Bio::DOOP::Util::Search;
2              
3 1     1   6 use strict;
  1         1  
  1         34  
4 1     1   6 use warnings;
  1         1  
  1         1641  
5              
6             =head1 NAME
7              
8             Bio::DOOP::Util::Search - Useful methods for easy search
9              
10             =head1 VERSION
11              
12             Version 0.13
13              
14             =cut
15              
16             our $VERSION = '0.13';
17              
18             =head1 SYNOPSIS
19              
20             use Bio::DOOP::DOOP;
21              
22             $db = Bio::DOOP::DBSQL->connect("user","passwd","database","localhost");
23             @motifs = @{Bio::DOOP::Util::Search::get_all_motifs_by_type($db,"V")};
24              
25             =head1 DESCRIPTION
26              
27             Collection of utilities handling large queries. Most of
28             the methods return arrayrefs of motifs, sequences or clusters.
29              
30             =head1 AUTHORS
31              
32             Tibor Nagy, Godollo, Hungary and Endre Sebestyen, Martonvasar, Hungary
33              
34             =head1 METHODS
35              
36             =head2 get_all_motifs_by_type
37              
38             Returns the arrayref of motifs with the type specified in the arguments.
39              
40             =cut
41              
42             sub get_all_motifs_by_type {
43 0     0 1   my $db = shift;
44 0           my $type = shift;
45              
46 0           my @motifs;
47 0           my $ret = $db->query("SELECT motif_feature_primary_id FROM motif_feature WHERE motif_type = \"$type\";");
48 0           for my $motif_id (@$ret){
49 0           push @motifs,Bio::DOOP::Motif->new($db,$$motif_id[0]);
50             }
51 0           return(\@motifs);
52             }
53              
54             =head2 get_all_original_subset
55              
56             Returns the arrayref of all original subsets.
57              
58             =cut
59              
60             sub get_all_original_subset {
61 0     0 1   my $db = shift;
62 0           my @subsets;
63 0           my $ret = $db->query("SELECT subset_primary_id FROM cluster_subset WHERE original = \"y\";");
64 0           for my $subset (@$ret){
65 0           push @subsets,Bio::DOOP::ClusterSubset->new($db,$$subset[0]);
66             }
67 0           return(\@subsets);
68             }
69              
70             =head2 get_all_cluster_by_gene_id
71              
72             Returns the arrayref of all Bio::DOOP::Cluster objects, defined by a gene id.
73              
74             =cut
75              
76             sub get_all_cluster_by_gene_id {
77 0     0 1   my $db = shift;
78 0           my $gene_id = shift;
79 0           my $promoter_size = shift;
80              
81 0           my @clusters;
82 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster,sequence,subset_xref,sequence_annotation WHERE sequence.sequence_annotation_primary_id = sequence_annotation.sequence_annotation_primary_id AND subset_xref.sequence_primary_id = sequence.sequence_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_annotation.sequence_gene_name LIKE '$gene_id%';");
83              
84 0           for my $cluster (@$ret){
85 0           push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
86             }
87 0           return(\@clusters);
88             }
89              
90             =head2 get_all_cluster_by_keyword
91              
92             Returns the arrayref of all Bio::DOOP::Cluster objects, containing the keyword in their description or tss annotation.
93              
94             =cut
95              
96             sub get_all_cluster_by_keyword {
97 0     0 1   my $db = shift;
98 0           my $keyword = shift;
99 0           my $promoter_size = shift;
100              
101 0           my @clusters;
102             my @cluster_db_id;
103 0           my %seen;
104              
105             # Query from sequence_annot.
106 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence_annotation, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND sequence.sequence_annotation_primary_id = sequence_annotation.sequence_annotation_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_annotation.sequence_desc LIKE '%$keyword%';");
107 0           for my $cluster (@$ret){
108 0           push @cluster_db_id,$$cluster[0];
109             }
110              
111             # Query from tss_annot.
112             # NO
113             #$ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, tss_annotation, sequence_feature, subset_xref WHERE subset_xref.sequence_primary_id = sequence_feature.sequence_primary_id AND sequence_feature.tss_primary_id = tss_annotation.tss_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND tss_annotation.tss_desc LIKE '%$keyword%';");
114             #for my $cluster (@$ret){
115             # push @cluster_db_id,$$cluster[0];
116             #}
117              
118             #Remove the redundant cluster_db_ids.
119 0           my @cluster_id_uniq = grep { ! $seen{ $_ }++ } @cluster_db_id;
  0            
120              
121 0           for my $cluster (@cluster_id_uniq){
122 0           push @clusters,Bio::DOOP::Cluster->new($db,$cluster,$promoter_size);
123             }
124            
125 0           return(\@clusters);
126             }
127              
128             =head2 get_all_cluster_by_xref
129              
130             Returns the arrayref of Bio::DOOP::Clsuter objects, containing a given xref.
131              
132             =cut
133              
134             sub get_all_cluster_by_xref {
135 0     0 1   my $db = shift;
136 0           my $type = shift;
137 0           my $value = shift;
138 0           my $promoter_size = shift;
139              
140 0           my @clusters;
141              
142 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence_xref, subset_xref WHERE sequence_xref.sequence_primary_id = subset_xref.sequence_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_xref.xref_type = '$type' AND sequence_xref.xref_id = '$value';");
143              
144 0           for my $cluster (@$ret){
145 0           push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
146             }
147              
148 0           return(\@clusters);
149             }
150              
151             =head2 get_all_cluster_by_taxon_name
152              
153             Returns the arrayref of Bio::DOOP::Cluster objects containing a taxon name.
154             Don't use this, use get_all_cluster_by_taxon_id with NCBI IDs!
155              
156             =cut
157              
158             sub get_all_cluster_by_taxon_name {
159 0     0 1   my $db = shift;
160 0           my $taxon = shift;
161 0           my $promoter_size = shift;
162              
163 0           my @clusters;
164              
165 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, taxon_annotation, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND sequence.taxon_primary_id = taxon_annotation.taxon_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND taxon_annotation.taxon_name = '$taxon';");
166              
167 0           for my $cluster (@$ret){
168 0           push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
169             }
170 0           return(\@clusters);
171             }
172              
173             =head2 get_all_cluster_id_by_taxon_name
174              
175             Returns the arrayref of cluster ids containing the taxon name.
176             Don't use this, use get_all_cluster_by_taxon_id with NCBI IDs!
177              
178             =cut
179              
180             sub get_all_cluster_id_by_taxon_name {
181 0     0 1   my $db = shift;
182 0           my $taxon = shift;
183 0           my $promoter_size = shift;
184              
185 0           my @clusters;
186              
187 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, taxon_annotation, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND sequence.taxon_primary_id = taxon_annotation.taxon_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND taxon_annotation.taxon_name = '$taxon';");
188              
189 0           for my $cluster (@$ret){
190 0           push @clusters,$$cluster[0];
191             }
192 0           return(\@clusters);
193             }
194              
195             =head2 get_all_cluster_by_taxon_id
196              
197             Returns the arrayref of Bio::DOOP::Cluster objects, containing an NCBI taxon id.
198              
199             =cut
200              
201             sub get_all_cluster_by_taxon_id {
202 0     0 1   my $db = shift;
203 0           my $taxon = shift;
204 0           my $promoter_size = shift;
205              
206 0           my @clusters;
207              
208 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, taxon_annotation, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND sequence.taxon_primary_id = taxon_annotation.taxon_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND taxon_annotation.taxon_taxid = '$taxon';");
209              
210 0           for my $cluster (@$ret){
211 0           push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
212             }
213 0           return(\@clusters);
214             }
215              
216             =head2 get_all_cluster_id_by_taxon_id
217              
218             Returns the arrayref of cluster ids containing an NCBI taxon id.
219              
220             =cut
221              
222             sub get_all_cluster_id_by_taxon_id {
223 0     0 1   my $db = shift;
224 0           my $taxon = shift;
225 0           my $promoter_size = shift;
226              
227 0           my @clusters;
228              
229 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, taxon_annotation, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND sequence.taxon_primary_id = taxon_annotation.taxon_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND taxon_annotation.taxon_taxid = '$taxon';");
230              
231 0           for my $cluster (@$ret){
232 0           push @clusters,$$cluster[0];
233             }
234 0           return(\@clusters);
235             }
236              
237             =head2 get_all_cluster_by_sequence_id
238            
239             Returns the arrayref of Bio::DOOP::Cluster objects, containing a given sequence id (fake GI).
240              
241             =cut
242              
243             sub get_all_cluster_by_sequence_id {
244 0     0 1   my $db = shift;
245 0           my $sequence_id = shift;
246 0           my $promoter_size = shift;
247              
248 0           my @clusters;
249              
250 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence.sequence_fake_gi LIKE '$sequence_id%';");
251              
252 0           for my $cluster (@$ret){
253 0           push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
254             }
255 0           return(\@clusters);
256             }
257              
258             =head2 get_all_cluster_by_atno
259              
260             Returns the arrayref of Bio::DOOP::Cluster objects, containing a given At Number.
261              
262             =cut
263              
264             sub get_all_cluster_by_atno {
265 0     0 1   my $db = shift;
266 0           my $atno = shift;
267 0           my $promoter_size = shift;
268              
269 0           my @clusters;
270              
271 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence_xref, subset_xref WHERE subset_xref.sequence_primary_id = sequence_xref.sequence_primary_id AND sequence_xref.xref_type = 'at_no' AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_xref.xref_id LIKE '$atno%';");
272              
273 0           for my $cluster (@$ret) {
274 0           push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
275             }
276 0           return(\@clusters);
277             }
278              
279             =head2 get_all_seq_by_motifid
280              
281             Returns the arrayref of Bio::DOOP::Sequence objects, containing a given motif id.
282              
283             =cut
284              
285             sub get_all_seq_by_motifid {
286 0     0 1   my $db = shift;
287 0           my $motifid = shift;
288 0           my @seqs;
289              
290 0           my $ret = $db->query("SELECT sequence_primary_id FROM sequence_feature WHERE motif_feature_primary_id = $motifid;");
291              
292 0           for my $seq (@$ret){
293 0           push @seqs,Bio::DOOP::Sequence->new($db,$$seq[0]);
294             }
295              
296 0           return(\@seqs);
297             }
298              
299             =head2 get_all_cluster_by_go_id
300              
301             Returns the arrayref of Bio::DOOP::Cluster objects, containing a given GO ID.
302              
303             =cut
304             sub get_all_cluster_by_go_id {
305 0     0 1   my $db = shift;
306 0           my $goid = shift;
307 0           my $promoter_size = shift;
308              
309 0           my @clusters;
310              
311 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence_xref, subset_xref WHERE subset_xref.sequence_primary_id = sequence_xref.sequence_primary_id AND sequence_xref.xref_type = 'go_id' AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_xref.xref_id LIKE '$goid';");
312              
313 0           for my $cluster (@$ret) {
314 0           push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
315             }
316 0           return(\@clusters);
317             }
318              
319             =head2 get_all_cluster_by_ensno
320              
321             Returns the arrayref of Bio::DOOP::Cluster objects, containing a given ENSEMBL gene ID.
322              
323             =cut
324              
325             sub get_all_cluster_by_ensno {
326 0     0 1   my $db = shift;
327 0           my $ensno = shift;
328 0           my $promoter_size = shift;
329              
330 0           my @clusters;
331              
332 0           my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence_xref, subset_xref WHERE subset_xref.sequence_primary_id = sequence_xref.sequence_primary_id AND sequence_xref.xref_type = 'ensembl_id' AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_xref.xref_id LIKE '$ensno%';");
333              
334 0           for my $cluster (@$ret) {
335 0           push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
336             }
337 0           return(\@clusters);
338             }
339              
340             =head2 get_all_cluster_id
341              
342             Returns an arrayref of all the cluster IDs of a given promoter/subset category.
343             For example returns all clusters with 1000 bp E type subsets.
344              
345             =cut
346              
347             sub get_all_cluster_id {
348              
349 0     0 1   my $db = shift;
350 0           my $promoter_size = shift;
351 0           my $subset_type = shift;
352              
353 0           my @clusters;
354              
355 0           my $ret = $db->query("SELECT cluster.cluster_id FROM cluster, cluster_subset WHERE cluster.cluster_promoter_type = '$promoter_size' AND cluster.cluster_primary_id = cluster_subset.cluster_primary_id AND cluster_subset.subset_type = '$subset_type';");
356              
357 0           for my $cluster (@$ret) {
358 0           push @clusters, $$cluster[0];
359             }
360 0           return(\@clusters);
361             }
362             1;