File Coverage

blib/lib/Bio/DOOP/Sequence.pm

Criterion	Covered	Total	%
statement	9	189	4.7
branch	0	14	0.0
condition			n/a
subroutine	3	30	10.0
pod	27	27	100.0
total	39	260	15.0

line	stmt	bran	sub	pod	time	code
1						package Bio::DOOP::Sequence;
2
3	1		1		6	use strict;
	1				2
	1				31
4	1		1		6	use warnings;
	1				2
	1				28
5	1		1		6	use Carp qw(cluck carp verbose);
	1				3
	1				1964
6
7						=head1 NAME
8
9						Bio::DOOP::Sequence - Sequence (promoter region) object
10
11						=head1 VERSION
12
13						Version 0.13
14
15						=cut
16
17						our $VERSION = '0.13';
18
19						=head1 SYNOPSIS
20
21						=head1 DESCRIPTION
22
23						This object represents a specific promoter sequence in the database.
24						You can access the annotation and the sequence through this object.
25
26						=head1 AUTHORS
27
28						Tibor Nagy, Godollo, Hungary and Endre Sebestyen, Martonvasar, Hungary
29
30						=head1 METHODS
31
32						=head2 new
33
34						Creates a new sequence object from the sequence primary id.
35
36						Return type: Bio::DOOP::Sequence object
37
38						$seq = Bio::DOOP::Sequence->new($db,"1234");
39
40						=cut
41
42						sub new {
43	0		0	1		my $self = {};
44	0					my $dummy = shift;
45	0					my $db = shift;
46	0					my $id = shift;
47	0					my $i;
48	0					my $ret = $db->query("SELECT * FROM sequence WHERE sequence_primary_id = \"$id\";");
49	0					my @fields = @{$$ret[0]};
	0
50
51	0					$self->{DB} = $db;
52	0					$self->{PRIMARY} = $fields[0];
53	0					$self->{FAKE} = $fields[1];
54	0					$self->{DB_ID} = $fields[2];
55	0					$self->{LENGTH} = $fields[3];
56	0					$self->{DATE} = $fields[4];
57	0					$self->{VERSION} = $fields[5];
58	0					$self->{ANNOT} = $fields[6];
59	0					$self->{ORIG} = $fields[7];
60	0					$self->{DATA} = $fields[8];
61	0					$self->{TAXON} = $fields[9];
62
63	0	0				if (defined($self->{ANNOT})){
64
65	0					$ret = $db->query("SELECT * FROM sequence_annotation WHERE sequence_annotation_primary_id = \"".$self->{ANNOT}."\";");
66	0					@fields = @{$$ret[0]};
	0
67
68	0					$self->{MAINDBID} = $fields[1];
69	0					$self->{UTR} = $fields[2];
70	0					$self->{DESC} = $fields[3];
71	0					$self->{GENENAME} = $fields[4];
72
73						}
74
75	0	0				if (defined($self->{DATA})) {
76	0					$ret = $db->query("SELECT * FROM sequence_data WHERE sequence_data_primary_id = \"".$self->{DATA}."\";");
77	0					@fields = @{$$ret[0]};
	0
78
79	0					$self->{FASTA} = $fields[2];
80	0					$self->{BLAST} = $fields[3];
81						}
82
83	0					$ret = $db->query("SELECT * FROM taxon_annotation WHERE taxon_primary_id = \"".$self->{TAXON}."\";");
84	0					@fields = @{$$ret[0]};
	0
85
86	0					$self->{TAXID} = $fields[1];
87	0					$self->{TAXNAME} = $fields[2];
88	0					$self->{TAXCLASS} = $fields[3];
89
90	0					my %xref;
91	0					$ret = $db->query("SELECT xref_id,xref_type FROM sequence_xref WHERE sequence_primary_id = \"$id\";");
92	0					for($i = 0; $i < $#$ret+1; $i++){
93	0					@fields = @{$$ret[$i]};
	0
94	0					push @{ $xref{$fields[1]} }, $fields[0];
	0
95						}
96	0					$self->{XREF} = \%xref;
97
98	0					bless $self;
99	0					return($self);
100						}
101
102						=head2 new_from_dbid
103
104						Creates a new sequence object from the full sequence id which contains the following:
105
106						17622344 - 81001020 _ 3712 _ 118 - 617 _ 3 _ +
107						\| \| \| \| \| \| \|
108						GI/fakeGI \| \| \| \| \| \|
109						\| \| \| \| \| \|
110						clusterID____\| \| \| \| \| \|
111						taxID___________________\| \| \| \| \|
112						start_________________________\| \| \| \|
113						end_________________________________\| \| \|
114						type_____________________________________\| \|
115						strand_______________________________________\|
116
117						Return type: Bio::DOOP::Sequence object
118
119						$seq = Bio::DOOP::Sequence->new_from_dbid($db,"17622344-81001020_3712_118-617_3_+");
120
121						=cut
122
123						sub new_from_dbid {
124	0		0	1		my $self = {};
125	0					my $dummy = shift;
126	0					my $db = shift;
127	0					my $id = shift;
128	0					my $i;
129	0					my $ret = $db->query("SELECT * FROM sequence WHERE sequence_id = \"$id\";");
130	0					my @fields = @{$$ret[0]};
	0
131
132	0					$self->{DB} = $db;
133	0					$self->{PRIMARY} = $fields[0];
134	0					$self->{FAKE} = $fields[1];
135	0					$self->{DB_ID} = $fields[2];
136	0					$self->{LENGTH} = $fields[3];
137	0					$self->{DATE} = $fields[4];
138	0					$self->{VERSION} = $fields[5];
139	0					$self->{ANNOT} = $fields[6];
140	0					$self->{ORIG} = $fields[7];
141	0					$self->{DATA} = $fields[8];
142	0					$self->{TAXON} = $fields[9];
143
144	0	0				if (defined($self->{ANNOT})){
145
146	0					$ret = $db->query("SELECT * FROM sequence_annotation WHERE sequence_annotation_primary_id = \"".$self->{ANNOT}."\";");
147	0					@fields = @{$$ret[0]};
	0
148
149	0					$self->{MAINDBID} = $fields[1];
150	0					$self->{UTR} = $fields[2];
151	0					$self->{DESC} = $fields[3];
152	0					$self->{GENENAME} = $fields[4];
153
154						}
155
156	0	0				if (defined($self->{DATA})) {
157	0					$ret = $db->query("SELECT * FROM sequence_data WHERE sequence_data_primary_id = \"".$self->{DATA}."\";");
158	0					@fields = @{$$ret[0]};
	0
159
160	0					$self->{FASTA} = $fields[2];
161	0					$self->{BLAST} = $fields[3];
162						}
163
164	0					$ret = $db->query("SELECT * FROM taxon_annotation WHERE taxon_primary_id = \"".$self->{TAXON}."\";");
165	0					@fields = @{$$ret[0]};
	0
166
167	0					$self->{TAXID} = $fields[1];
168	0					$self->{TAXNAME} = $fields[2];
169	0					$self->{TAXCLASS} = $fields[3];
170
171	0					my %xref;
172	0					$ret = $db->query("SELECT xref_id,xref_type FROM sequence_xref WHERE sequence_primary_id = \"$id\";");
173	0					for($i = 0; $i < $#$ret+1; $i++){
174	0					@fields = @{$$ret[$i]};
	0
175	0					push @{ $xref{$fields[1]} }, $fields[0];
	0
176						}
177	0					$self->{XREF} = \%xref;
178
179	0					bless $self;
180	0					return($self);
181						}
182
183						=head2 get_id
184
185						Returns the sequence primary id. This is the internal ID from the MySQL database.
186
187						Return type: string
188
189						my $id = $seq->get_id;
190
191						=cut
192
193						sub get_id {
194	0		0	1		my $self = shift;
195	0					return($self->{PRIMARY});
196						}
197
198						=head2 get_fake_id
199
200						Returns the sequence GI or a fake GI if no real GI is available.
201
202						Return type: string
203
204						my $id = $seq->get_fake_id;
205
206						=cut
207
208						sub get_fake_id {
209	0		0	1		my $self = shift;
210	0					return($self->{FAKE});
211						}
212
213						=head2 get_db_id
214
215						Returns the full sequence ID, described at the new_from_dbid method.
216
217						Return type: string
218
219						my $id = $seq->get_db_id;
220
221						=cut
222
223						sub get_db_id {
224	0		0	1		my $self = shift;
225	0					return($self->{DB_ID});
226						}
227
228						=head2 get_length
229
230						Returns the length of the sequence.
231
232						Return type: string
233
234						my $length = $seq->get_length;
235
236						=cut
237
238						sub get_length {
239	0		0	1		my $self = shift;
240	0					return($self->{LENGTH});
241						}
242
243						=head2 get_date
244
245						Returns the last modification date of the MySQL record.
246
247						Return type: string
248
249						my $date = $seq->get_date;
250
251						=cut
252
253						sub get_date {
254	0		0	1		my $self = shift;
255	0					return($self->{DATE});
256						}
257
258						=head2 get_ver
259
260						Returns the version of the sequence.
261
262						Return type: string
263
264						my $version = $seq->get_ver;
265
266						=cut
267
268						sub get_ver {
269	0		0	1		my $self = shift;
270	0					return($self->{VERSION});
271						}
272
273						=head2 get_annot_id
274
275						Returns the sequence annotation primary id. This is the internal ID from the MySQL database.
276
277						Return type: string
278
279						my $annotation_id = $seq->get_annot_id;
280
281						=cut
282
283						sub get_annot_id {
284	0		0	1		my $self = shift;
285	0					return($self->{ANNOT});
286						}
287
288						=head2 get_orig_id
289
290						This method is not yet implemented.
291
292						=cut
293
294						sub get_orig_id {
295	0		0	1		my $self = shift;
296	0					return($self->{ORIG});
297						}
298
299						=head2 get_data_id
300
301						Returns the sequence data primary id. This is the internal ID from the MySQL database.
302
303						Return type: string
304
305						my $data_id = $seq->get_data_id;
306
307						=cut
308
309						sub get_data_id {
310	0		0	1		my $self = shift;
311	0					return($self->{DATA});
312						}
313
314						=head2 get_taxon_id
315
316						Returns the taxon annotation primary id. This is the internal ID from the MySQL database.
317
318						Return type: string
319
320						my $taxon_id = $seq->get_taxon_id;
321
322						=cut
323
324						sub get_taxon_id {
325	0		0	1		my $self = shift;
326	0					return($self->{TAXON});
327						}
328
329						=head2 get_data_main_db_id
330
331						Returns the sequence annotation primary id. This is the internal ID from the MySQL database.
332
333						Return type: string
334
335						my $annotation_id = $seq->get_data_main_db_id;
336
337						=cut
338
339						sub get_data_main_db_id {
340	0		0	1		my $self = shift;
341	0					return($self->{MAINDBID});
342						}
343
344						=head2 get_utr_length
345
346						Returns the length of the 5' UTR included in the sequence.
347
348						Return type: string
349
350						$utr_length = $seq->get_utr_length;
351
352						=cut
353
354						sub get_utr_length {
355	0		0	1		my $self = shift;
356	0					return($self->{UTR});
357						}
358
359						=head2 get_desc
360
361						Returns the description of the sequence.
362
363						Return type: string
364
365						print $seq->get_desc,"\n";
366
367						=cut
368
369						sub get_desc {
370	0		0	1		my $self = shift;
371	0					return($self->{DESC});
372						}
373
374						=head2 get_gene_name
375
376						Returns the gene name of the promoter. If the gene is unknow or not annotated, it is empty.
377
378						Return type: string
379
380						$gene_name = $seq->get_gene_name;
381
382						=cut
383
384						sub get_gene_name {
385	0		0	1		my $self = shift;
386	0					return($self->{GENENAME});
387						}
388
389						=head2 get_fasta
390
391						Returns the promoter sequence in FASTA format.
392
393						Return type: string
394
395						print $seq->get_fasta;
396
397						=cut
398
399						sub get_fasta {
400	0		0	1		my $self = shift;
401	0					my $seq = ">".$self->{DB_ID}."\n".$self->{FASTA}."\n";
402	0					return($seq);
403						}
404
405						=head2 get_raw_seq
406
407						Returns the raw sequence without any other identifier.
408
409						Return type: string
410
411						my $rawseq = $seq->get_raw_seq;
412
413						=cut
414
415						sub get_raw_seq {
416	0		0	1		my $self = shift;
417	0					my $seq = $self->{FASTA};
418	0					return($seq);
419						}
420
421						=head2 get_blast
422
423						This method is not yet implemented.
424
425						=cut
426
427						sub get_blast {
428	0		0	1		my $self = shift;
429	0					return($self->{BLAST});
430						}
431
432						=head2 get_taxid
433
434						Returns the NCBI taxon ID of the sequence.
435
436						Return type: string
437
438						$taxid = $seq->get_taxid;
439
440						=cut
441
442						sub get_taxid {
443	0		0	1		my $self = shift;
444	0					return($self->{TAXID});
445						}
446
447						=head2 get_taxon_name
448
449						Returns the scientific name of the sequence's taxon ID.
450
451						Return type: string
452
453						print $seq->get_taxon_name;
454
455						=cut
456
457						sub get_taxon_name {
458	0		0	1		my $self = shift;
459	0					return($self->{TAXNAME});
460						}
461
462						=head2 get_taxon_class
463
464						Returns the taxonomic class of the sequence's taxon ID. Used internally,
465						to create monophyletic sets of sequences in an orthologous cluster.
466
467						Return type: string
468
469						print $seq->get_taxon_class;
470
471						=cut
472
473						sub get_taxon_class {
474	0		0	1		my $self = shift;
475	0					return($self->{TAXCLASS});
476						}
477
478						=head2 print_all_xref
479
480						Prints all the xrefs to other databases.
481
482						Type of xref IDs :
483
484						go_id : Gene Ontology ID
485						ncbi_gene_id : NCBI gene ID
486						ncbi_cds_gi : NCBI CDS GI
487						ncbi_rna_gi : NCBI RNA GI
488						ncbi_cds_prot_id : NCBI CDS protein ID
489						ncbi_rna_tr_id : NCBI RNA transcript ID
490						at_no : At Number
491
492						TODO : sometimes it gives back duplicated data
493
494						$seq->print_all_xref;
495
496						=cut
497
498						sub print_all_xref {
499	0		0	1		my $self = shift;
500	0					for my $keys ( keys %{ $self->{XREF} }){
	0
501	0					print"$keys: ";
502	0					for (@{ ${ $self->{XREF} }{$keys} }){print "$_ "}
	0
	0
	0
503	0					print"\n";
504						}
505						}
506
507						=head2 get_all_xref_keys
508
509						Returns the arrayref of xref names.
510
511						Return type: arrayref, the array containing strings (xref names)
512
513						@keys = @{$seq->get_all_xref_keys};
514
515						=cut
516
517						sub get_all_xref_keys {
518	0		0	1		my $self = shift;
519
520	0					my @xrefkeys = keys %{ $self->{XREF} };
	0
521	0					return(\@xrefkeys);
522						}
523
524						=head2 get_xref_value
525
526						Returns the arrayref of a given xref's values'.
527
528						Return type: arrayref, the array containg strings (xref values)
529
530						@values = @{$seq->get_xref_value("go_id")};
531
532						=cut
533
534						sub get_xref_value {
535	0		0	1		my $self = shift;
536	0					my $key = shift;
537
538	0	0				if (${ $self->{XREF} }{$key}){
	0
539	0					return(${ $self->{XREF} }{$key});
	0
540						}
541						else {
542	0					return(-1);
543						}
544						}
545
546						=head2 get_all_seq_features
547
548						Returns the arrayref of all sequence features or -1 in the case of an error.
549
550						Return type: arrayref, the array containing Bio::DOOP::SequenceFeature objects
551
552						@seqfeat = @{$seq->get_all_seq_features};
553
554						=cut
555
556						sub get_all_seq_features {
557	0		0	1		my $self = shift;
558
559	0					my @seqfeatures;
560
561						# The order of the sequence features is important to correctly draw the picture of the cluster.
562	0					my $query = "SELECT sequence_feature_primary_id FROM sequence_feature WHERE sequence_primary_id = \"".$self->{PRIMARY}."\" ORDER BY feature_start;";
563	0					my $ref = $self->{DB}->query($query);
564
565	0	0				if ($#$ref == -1){
566	0					return(-1);
567						}
568
569	0					for my $sfpid (@$ref){
570	0					my $sf = Bio::DOOP::SequenceFeature->new($self->{DB},$$sfpid[0]);
571	0					push @seqfeatures, $sf;
572						}
573
574	0					return(\@seqfeatures);
575						}
576
577						=head2 get_all_subsets
578
579						Returns all subsets which contain the sequence.
580
581						Return type: arrayref, the array containing Bio::DOOP::ClusterSubset objects
582
583						@subsets = @{$seq->get_all_subsets};
584
585						=cut
586
587						sub get_all_subsets {
588	0		0	1		my $self = shift;
589
590	0					my @subsets;
591
592	0					my $id = $self->{PRIMARY};
593	0					my $query = "SELECT subset_primary_id FROM subset_xref WHERE sequence_primary_id = \"$id\"";
594	0					my $ref = $self->{DB}->query($query);
595
596	0	0				if ($#$ref == -1){
597	0					return(-1);
598						}
599
600	0					for my $subset (@$ref){
601	0					push @subsets, Bio::DOOP::ClusterSubset->new($self->{DB},$$subset[0]);
602						}
603
604	0					return(\@subsets);
605						}
606
607						1;