File Coverage

blib/lib/Bio/DOOP/Util/Search.pm

Criterion	Covered	Total	%
statement	6	128	4.6
branch			n/a
condition			n/a
subroutine	2	17	11.7
pod	15	15	100.0
total	23	160	14.3

line	stmt	sub	pod	time	code
1					package Bio::DOOP::Util::Search;
2
3	1	1		6	use strict;
	1			1
	1			34
4	1	1		6	use warnings;
	1			1
	1			1641
5
6					=head1 NAME
7
8					Bio::DOOP::Util::Search - Useful methods for easy search
9
10					=head1 VERSION
11
12					Version 0.13
13
14					=cut
15
16					our $VERSION = '0.13';
17
18					=head1 SYNOPSIS
19
20					use Bio::DOOP::DOOP;
21
22					$db = Bio::DOOP::DBSQL->connect("user","passwd","database","localhost");
23					@motifs = @{Bio::DOOP::Util::Search::get_all_motifs_by_type($db,"V")};
24
25					=head1 DESCRIPTION
26
27					Collection of utilities handling large queries. Most of
28					the methods return arrayrefs of motifs, sequences or clusters.
29
30					=head1 AUTHORS
31
32					Tibor Nagy, Godollo, Hungary and Endre Sebestyen, Martonvasar, Hungary
33
34					=head1 METHODS
35
36					=head2 get_all_motifs_by_type
37
38					Returns the arrayref of motifs with the type specified in the arguments.
39
40					=cut
41
42					sub get_all_motifs_by_type {
43	0	0	1		my $db = shift;
44	0				my $type = shift;
45
46	0				my @motifs;
47	0				my $ret = $db->query("SELECT motif_feature_primary_id FROM motif_feature WHERE motif_type = \"$type\";");
48	0				for my $motif_id (@$ret){
49	0				push @motifs,Bio::DOOP::Motif->new($db,$$motif_id[0]);
50					}
51	0				return(\@motifs);
52					}
53
54					=head2 get_all_original_subset
55
56					Returns the arrayref of all original subsets.
57
58					=cut
59
60					sub get_all_original_subset {
61	0	0	1		my $db = shift;
62	0				my @subsets;
63	0				my $ret = $db->query("SELECT subset_primary_id FROM cluster_subset WHERE original = \"y\";");
64	0				for my $subset (@$ret){
65	0				push @subsets,Bio::DOOP::ClusterSubset->new($db,$$subset[0]);
66					}
67	0				return(\@subsets);
68					}
69
70					=head2 get_all_cluster_by_gene_id
71
72					Returns the arrayref of all Bio::DOOP::Cluster objects, defined by a gene id.
73
74					=cut
75
76					sub get_all_cluster_by_gene_id {
77	0	0	1		my $db = shift;
78	0				my $gene_id = shift;
79	0				my $promoter_size = shift;
80
81	0				my @clusters;
82	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster,sequence,subset_xref,sequence_annotation WHERE sequence.sequence_annotation_primary_id = sequence_annotation.sequence_annotation_primary_id AND subset_xref.sequence_primary_id = sequence.sequence_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_annotation.sequence_gene_name LIKE '$gene_id%';");
83
84	0				for my $cluster (@$ret){
85	0				push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
86					}
87	0				return(\@clusters);
88					}
89
90					=head2 get_all_cluster_by_keyword
91
92					Returns the arrayref of all Bio::DOOP::Cluster objects, containing the keyword in their description or tss annotation.
93
94					=cut
95
96					sub get_all_cluster_by_keyword {
97	0	0	1		my $db = shift;
98	0				my $keyword = shift;
99	0				my $promoter_size = shift;
100
101	0				my @clusters;
102					my @cluster_db_id;
103	0				my %seen;
104
105					# Query from sequence_annot.
106	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence_annotation, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND sequence.sequence_annotation_primary_id = sequence_annotation.sequence_annotation_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_annotation.sequence_desc LIKE '%$keyword%';");
107	0				for my $cluster (@$ret){
108	0				push @cluster_db_id,$$cluster[0];
109					}
110
111					# Query from tss_annot.
112					# NO
113					#$ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, tss_annotation, sequence_feature, subset_xref WHERE subset_xref.sequence_primary_id = sequence_feature.sequence_primary_id AND sequence_feature.tss_primary_id = tss_annotation.tss_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND tss_annotation.tss_desc LIKE '%$keyword%';");
114					#for my $cluster (@$ret){
115					# push @cluster_db_id,$$cluster[0];
116					#}
117
118					#Remove the redundant cluster_db_ids.
119	0				my @cluster_id_uniq = grep { ! $seen{ $_ }++ } @cluster_db_id;
	0
120
121	0				for my $cluster (@cluster_id_uniq){
122	0				push @clusters,Bio::DOOP::Cluster->new($db,$cluster,$promoter_size);
123					}
124
125	0				return(\@clusters);
126					}
127
128					=head2 get_all_cluster_by_xref
129
130					Returns the arrayref of Bio::DOOP::Clsuter objects, containing a given xref.
131
132					=cut
133
134					sub get_all_cluster_by_xref {
135	0	0	1		my $db = shift;
136	0				my $type = shift;
137	0				my $value = shift;
138	0				my $promoter_size = shift;
139
140	0				my @clusters;
141
142	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence_xref, subset_xref WHERE sequence_xref.sequence_primary_id = subset_xref.sequence_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_xref.xref_type = '$type' AND sequence_xref.xref_id = '$value';");
143
144	0				for my $cluster (@$ret){
145	0				push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
146					}
147
148	0				return(\@clusters);
149					}
150
151					=head2 get_all_cluster_by_taxon_name
152
153					Returns the arrayref of Bio::DOOP::Cluster objects containing a taxon name.
154					Don't use this, use get_all_cluster_by_taxon_id with NCBI IDs!
155
156					=cut
157
158					sub get_all_cluster_by_taxon_name {
159	0	0	1		my $db = shift;
160	0				my $taxon = shift;
161	0				my $promoter_size = shift;
162
163	0				my @clusters;
164
165	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, taxon_annotation, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND sequence.taxon_primary_id = taxon_annotation.taxon_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND taxon_annotation.taxon_name = '$taxon';");
166
167	0				for my $cluster (@$ret){
168	0				push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
169					}
170	0				return(\@clusters);
171					}
172
173					=head2 get_all_cluster_id_by_taxon_name
174
175					Returns the arrayref of cluster ids containing the taxon name.
176					Don't use this, use get_all_cluster_by_taxon_id with NCBI IDs!
177
178					=cut
179
180					sub get_all_cluster_id_by_taxon_name {
181	0	0	1		my $db = shift;
182	0				my $taxon = shift;
183	0				my $promoter_size = shift;
184
185	0				my @clusters;
186
187	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, taxon_annotation, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND sequence.taxon_primary_id = taxon_annotation.taxon_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND taxon_annotation.taxon_name = '$taxon';");
188
189	0				for my $cluster (@$ret){
190	0				push @clusters,$$cluster[0];
191					}
192	0				return(\@clusters);
193					}
194
195					=head2 get_all_cluster_by_taxon_id
196
197					Returns the arrayref of Bio::DOOP::Cluster objects, containing an NCBI taxon id.
198
199					=cut
200
201					sub get_all_cluster_by_taxon_id {
202	0	0	1		my $db = shift;
203	0				my $taxon = shift;
204	0				my $promoter_size = shift;
205
206	0				my @clusters;
207
208	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, taxon_annotation, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND sequence.taxon_primary_id = taxon_annotation.taxon_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND taxon_annotation.taxon_taxid = '$taxon';");
209
210	0				for my $cluster (@$ret){
211	0				push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
212					}
213	0				return(\@clusters);
214					}
215
216					=head2 get_all_cluster_id_by_taxon_id
217
218					Returns the arrayref of cluster ids containing an NCBI taxon id.
219
220					=cut
221
222					sub get_all_cluster_id_by_taxon_id {
223	0	0	1		my $db = shift;
224	0				my $taxon = shift;
225	0				my $promoter_size = shift;
226
227	0				my @clusters;
228
229	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, taxon_annotation, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND sequence.taxon_primary_id = taxon_annotation.taxon_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND taxon_annotation.taxon_taxid = '$taxon';");
230
231	0				for my $cluster (@$ret){
232	0				push @clusters,$$cluster[0];
233					}
234	0				return(\@clusters);
235					}
236
237					=head2 get_all_cluster_by_sequence_id
238
239					Returns the arrayref of Bio::DOOP::Cluster objects, containing a given sequence id (fake GI).
240
241					=cut
242
243					sub get_all_cluster_by_sequence_id {
244	0	0	1		my $db = shift;
245	0				my $sequence_id = shift;
246	0				my $promoter_size = shift;
247
248	0				my @clusters;
249
250	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence, subset_xref WHERE subset_xref.sequence_primary_id = sequence.sequence_primary_id AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence.sequence_fake_gi LIKE '$sequence_id%';");
251
252	0				for my $cluster (@$ret){
253	0				push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
254					}
255	0				return(\@clusters);
256					}
257
258					=head2 get_all_cluster_by_atno
259
260					Returns the arrayref of Bio::DOOP::Cluster objects, containing a given At Number.
261
262					=cut
263
264					sub get_all_cluster_by_atno {
265	0	0	1		my $db = shift;
266	0				my $atno = shift;
267	0				my $promoter_size = shift;
268
269	0				my @clusters;
270
271	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence_xref, subset_xref WHERE subset_xref.sequence_primary_id = sequence_xref.sequence_primary_id AND sequence_xref.xref_type = 'at_no' AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_xref.xref_id LIKE '$atno%';");
272
273	0				for my $cluster (@$ret) {
274	0				push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
275					}
276	0				return(\@clusters);
277					}
278
279					=head2 get_all_seq_by_motifid
280
281					Returns the arrayref of Bio::DOOP::Sequence objects, containing a given motif id.
282
283					=cut
284
285					sub get_all_seq_by_motifid {
286	0	0	1		my $db = shift;
287	0				my $motifid = shift;
288	0				my @seqs;
289
290	0				my $ret = $db->query("SELECT sequence_primary_id FROM sequence_feature WHERE motif_feature_primary_id = $motifid;");
291
292	0				for my $seq (@$ret){
293	0				push @seqs,Bio::DOOP::Sequence->new($db,$$seq[0]);
294					}
295
296	0				return(\@seqs);
297					}
298
299					=head2 get_all_cluster_by_go_id
300
301					Returns the arrayref of Bio::DOOP::Cluster objects, containing a given GO ID.
302
303					=cut
304					sub get_all_cluster_by_go_id {
305	0	0	1		my $db = shift;
306	0				my $goid = shift;
307	0				my $promoter_size = shift;
308
309	0				my @clusters;
310
311	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence_xref, subset_xref WHERE subset_xref.sequence_primary_id = sequence_xref.sequence_primary_id AND sequence_xref.xref_type = 'go_id' AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_xref.xref_id LIKE '$goid';");
312
313	0				for my $cluster (@$ret) {
314	0				push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
315					}
316	0				return(\@clusters);
317					}
318
319					=head2 get_all_cluster_by_ensno
320
321					Returns the arrayref of Bio::DOOP::Cluster objects, containing a given ENSEMBL gene ID.
322
323					=cut
324
325					sub get_all_cluster_by_ensno {
326	0	0	1		my $db = shift;
327	0				my $ensno = shift;
328	0				my $promoter_size = shift;
329
330	0				my @clusters;
331
332	0				my $ret = $db->query("SELECT DISTINCT(cluster.cluster_id) FROM cluster, sequence_xref, subset_xref WHERE subset_xref.sequence_primary_id = sequence_xref.sequence_primary_id AND sequence_xref.xref_type = 'ensembl_id' AND cluster.cluster_primary_id = subset_xref.cluster_primary_id AND sequence_xref.xref_id LIKE '$ensno%';");
333
334	0				for my $cluster (@$ret) {
335	0				push @clusters,Bio::DOOP::Cluster->new($db,$$cluster[0],$promoter_size);
336					}
337	0				return(\@clusters);
338					}
339
340					=head2 get_all_cluster_id
341
342					Returns an arrayref of all the cluster IDs of a given promoter/subset category.
343					For example returns all clusters with 1000 bp E type subsets.
344
345					=cut
346
347					sub get_all_cluster_id {
348
349	0	0	1		my $db = shift;
350	0				my $promoter_size = shift;
351	0				my $subset_type = shift;
352
353	0				my @clusters;
354
355	0				my $ret = $db->query("SELECT cluster.cluster_id FROM cluster, cluster_subset WHERE cluster.cluster_promoter_type = '$promoter_size' AND cluster.cluster_primary_id = cluster_subset.cluster_primary_id AND cluster_subset.subset_type = '$subset_type';");
356
357	0				for my $cluster (@$ret) {
358	0				push @clusters, $$cluster[0];
359					}
360	0				return(\@clusters);
361					}
362					1;