File Coverage

Bio/SeqFeature/Tools/IDHandler.pm
Criterion Covered Total %
statement 19 62 30.6
branch 4 18 22.2
condition 5 15 33.3
subroutine 3 6 50.0
pod 4 4 100.0
total 35 105 33.3


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::SeqFeature::Tools::IDHandler
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Chris Mungall
7             #
8             # Copyright Chris Mungall
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqFeature::Tools::IDHandler - maps $seq_feature-Eprimary_tag
17              
18             =head1 SYNOPSIS
19              
20             use Bio::SeqIO;
21             use Bio::SeqFeature::Tools::IDHandler;
22              
23              
24             =head1 DESCRIPTION
25              
26             Class to map $seq_feature-Eprimary_tag
27              
28              
29             =head1 FEEDBACK
30              
31             =head2 Mailing Lists
32              
33             User feedback is an integral part of the evolution of this and other
34             Bioperl modules. Send your comments and suggestions preferably to the
35             Bioperl mailing lists Your participation is much appreciated.
36              
37             bioperl-l@bioperl.org - General discussion
38             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39              
40             =head2 Support
41              
42             Please direct usage questions or support issues to the mailing list:
43              
44             I
45              
46             rather than to the module maintainer directly. Many experienced and
47             reponsive experts will be able look at the problem and quickly
48             address it. Please include a thorough description of the problem
49             with code and data examples if at all possible.
50              
51             =head2 Reporting Bugs
52              
53             report bugs to the Bioperl bug tracking system to help us keep track
54             the bugs and their resolution. Bug reports can be submitted via the
55             web:
56              
57             https://github.com/bioperl/bioperl-live/issues
58              
59             =head1 AUTHOR - Chris Mungall
60              
61             Email: cjm@fruitfly.org
62              
63             =head1 APPENDIX
64              
65             The rest of the documentation details each of the object
66             methods. Internal methods are usually preceded with a _
67              
68             =cut
69              
70              
71             # Let the code begin...
72              
73             package Bio::SeqFeature::Tools::IDHandler;
74 2     2   7 use strict;
  2         3  
  2         58  
75              
76             # Object preamble - inherits from Bio::Root::Root
77              
78 2     2   8 use base qw(Bio::Root::Root);
  2         2  
  2         946  
79              
80             =head2 new
81              
82             Title : new
83             Usage : $unflattener = Bio::SeqFeature::Tools::IDHandler->new();
84             Function: constructor
85             Example :
86             Returns : a new Bio::SeqFeature::Tools::IDHandler
87             Args : see below
88              
89              
90             =cut
91              
92             sub new {
93 0     0 1 0 my($class,@args) = @_;
94 0         0 my $self = $class->SUPER::new(@args);
95              
96 0         0 my($generate_id_sub) =
97             $self->_rearrange([qw(GENERATE_ID_SUB
98             )],
99             @args);
100              
101 0         0 return $self; # success - we hope!
102             }
103              
104             =head2 set_ParentIDs_from_hierarchy()
105              
106             Title : set_ParentIDs_from_hierarchy()
107             Usage : $idhandler->set_ParentIDs_from_hierarchy($fholder)
108             Function: populates tags Parent and ID via holder hierarchy
109             Example :
110             Returns :
111             Args : Bio::featureHolderI (either a SeqFeature or a Seq)
112              
113             This is mainly for GFF3 export
114              
115             GFF3 uses the tags ID and Parent to represent the feature containment
116             hierarchy; it does NOT use the feature holder tree
117              
118             This method sets Parent (and ID for any parents not set) based on
119             feature holder/containement hierarchy, ready for GFF3 output
120              
121             =cut
122              
123             # method author: cjm@fruitfly.org
124             sub set_ParentIDs_from_hierarchy(){
125 0     0 1 0 my $self = shift;
126 0         0 my ($featholder) = @_;
127              
128             # we will traverse the tree of contained seqfeatures
129             # (a seqfeature is itself a holder)
130              
131             # start with the top-level features
132 0         0 my @sfs = $featholder->get_SeqFeatures;
133              
134             # clear existing parent tags
135             # (we assume this is the desired behaviour)
136 0         0 my @all_sfs = $featholder->get_all_SeqFeatures;
137 0         0 foreach (@all_sfs) {
138 0 0       0 if ($_->has_tag('Parent')) {
139 0         0 $_->remove_tag('Parent');
140             }
141             }
142            
143              
144             # iterate until entire tree traversed
145 0         0 while (@sfs) {
146 0         0 my $sf = shift @sfs;
147 0         0 my @subsfs = $sf->get_SeqFeatures;
148              
149             # see if the ID tag
150 0         0 my $id = $sf->primary_id;
151 0 0       0 if (!$id) {
152             # the skolem function feature(seq,start,end,type)
153             # is presumed to uniquely identify this feature, and
154             # to also be persistent
155 0         0 $id = $sf->generate_unique_persistent_id;
156             }
157 0         0 foreach my $subsf (@subsfs) {
158 0         0 $subsf->add_tag_value('Parent', $id);
159             }
160            
161             # push children on to end of stack (breadth first search)
162 0         0 push(@sfs, @subsfs);
163             }
164 0         0 return;
165             }
166              
167             =head2 create_hierarchy_from_ParentIDs
168              
169             Title : create_hierarchy_from_ParentIDs
170             Usage : $idhandler->set_ParentIDs_from_hierarchy($fholder)
171             Function: inverse of set_ParentIDs_from_hierarchy
172             Example :
173             Returns : list of top SeqFeatures
174             Args :
175              
176              
177             =cut
178              
179             sub create_hierarchy_from_ParentIDs{
180 0     0 1 0 my ($self,$featholder,@args) = @_;
181              
182 0         0 my @sfs = $featholder->get_all_SeqFeatures;
183 0         0 my %sf_by_ID = ();
184 0         0 foreach (@sfs) {
185 0         0 my $id = $_->primary_id;
186 0 0       0 next unless $id;
187 0 0       0 if ($sf_by_ID{$id}) {
188 0         0 $featholder->throw("DUPLICATE ID: $id");
189             }
190 0         0 $sf_by_ID{$id} = $_;
191 0         0 $_->remove_SeqFeatures; # clear existing hierarchy (assume this is desired)
192             }
193 0 0       0 if (!%sf_by_ID) {
194             # warn??
195             # this is actually expected behaviour for some kinds of data;
196             # eg lists of STSs - no containment hierarchy
197 0         0 return;
198             }
199              
200             my @topsfs =
201             grep {
202 0         0 my @parents = $_->get_tagset_values('Parent');
  0         0  
203 0         0 foreach my $parent (@parents) {
204             $sf_by_ID{$parent}->add_SeqFeature($_)
205 0 0       0 if exists $sf_by_ID{$parent};
206             }
207 0         0 !@parents;
208             } @sfs;
209 0         0 $featholder->remove_SeqFeatures;
210 0         0 $featholder->add_SeqFeature($_) foreach @topsfs;
211 0         0 return @topsfs;
212             }
213              
214              
215             =head2 generate_unique_persistent_id
216              
217             Title : generate_unique_persistent_id
218             Usage :
219             Function: generates a unique and persistent identifier for this
220             Example :
221             Returns : value of primary_id (a scalar)
222             Args :
223              
224             Will generate an ID, B set primary_id() (see above)
225              
226             The ID is a string generated from
227              
228             seq_id
229             primary_tag
230             start
231             end
232              
233             There are three underlying assumptions: that all the above accessors
234             are set; that seq_id is a persistent and unique identifier for the
235             sequence containing this feature; and that
236              
237             (seq_id, primary_tag, start, end)
238              
239             is a "unique constraint" over features
240              
241             The ID is persistent, so long as none of these values change - if they
242             do, it is considered a separate entity
243              
244             =cut
245              
246             # method author: cjm@fruitfly.org
247             sub generate_unique_persistent_id{
248 58     58 1 59 my ($self,$sf,@args) = @_;
249              
250 58         44 my $id;
251 58 50       122 if (!$sf->isa("Bio::SeqFeatureI")) {
252 0         0 $sf->throw("not a Bio::SeqFeatureI");
253             }
254 58   33     86 my $seq_id = $sf->seq_id || $sf->throw("seq_id must be set: ".$sf->display_name);
255             #my $seq_id = $sf->seq_id || 'unknown_seq';
256 58 50       85 if ($sf->has_tag('transcript_id')) {
    100          
257 0         0 ($id) = $sf->get_tag_values('transcript_id');
258             }
259             elsif ($sf->has_tag('protein_id')) {
260 14         17 ($id) = $sf->get_tag_values('protein_id');
261             }
262             else {
263 44   33     65 my $source = $sf->source_tag || $sf->throw("source tag must be set: ".$sf->display_name);
264             #my $source = $sf->source_tag || 'unknown_source';
265 44   33     64 my $start = $sf->start || $sf->throw("start must be set or is zero: ".$sf->display_name);
266 44   33     69 my $end = $sf->end || $sf->throw("end must be set");
267 44   33     75 my $type = $sf->primary_tag || $sf->throw("primary_tag/type must be set: ".$sf->display_name);
268              
269 44         100 $id = "$source:$type:$seq_id:$start:$end";
270             }
271 58         108 $sf->primary_id($id);
272 58         105 return $id;
273             }
274              
275             1;