File Coverage

Bio/AlignIO/Handler/GenericAlignHandler.pm
Criterion Covered Total %
statement 185 211 87.6
branch 67 120 55.8
condition 20 38 52.6
subroutine 29 32 90.6
pod 11 11 100.0
total 312 412 75.7


line stmt bran cond sub pod time code
1             # Let the code begin...
2              
3             package Bio::AlignIO::Handler::GenericAlignHandler;
4              
5 3     3   17 use strict;
  3         5  
  3         74  
6 3     3   13 use warnings;
  3         5  
  3         85  
7              
8 3     3   248 use Bio::Annotation::Collection;
  3         5  
  3         63  
9 3     3   473 use Bio::Annotation::Comment;
  3         4  
  3         64  
10 3     3   14 use Bio::Annotation::SimpleValue;
  3         4  
  3         49  
11 3     3   753 use Bio::Annotation::Target;
  3         6  
  3         74  
12 3     3   14 use Bio::Annotation::DBLink;
  3         6  
  3         48  
13 3     3   531 use Bio::Annotation::Reference;
  3         5  
  3         70  
14 3     3   432 use Bio::SimpleAlign;
  3         6  
  3         66  
15 3     3   14 use Data::Dumper;
  3         4  
  3         190  
16              
17 3     3   20 use base qw(Bio::Root::Root Bio::HandlerBaseI);
  3         6  
  3         926  
18              
19             # only stockholm is defined for now...
20             my %HANDLERS = (
21             # stockholm has sequence and alignment specific annotation; this
22             'stockholm' => {
23             'CONSENSUS_META' => \&_generic_consensus_meta,
24             'SEQUENCE' => \&_generic_metaseq,
25             'NAMED_META' => \&_generic_metaseq,
26             'ACCESSION' => \&_generic_store,
27             'ALPHABET' => \&_generic_store,
28             'ID' => \&_generic_store,
29             'DESCRIPTION' => \&_generic_store,
30             'REFERENCE' => \&_generic_reference,
31             'DBLINK' => \&_stockholm_target,
32             'DATABASE_COMMENT' => \&_generic_comment,
33             'ALIGNMENT_COMMENT' => \&_generic_comment,
34             '_DEFAULT_' => \&_generic_simplevalue
35             },
36             );
37              
38             sub new {
39 7     7 1 24 my ($class, @args) = @_;
40 7         33 my $self = $class->SUPER::new(@args);
41 7         30 my ($format, $verbose) = $self->_rearrange([qw(FORMAT VERBOSE)], @args);
42 7 50       23 $self->throw("Must define alignment record format") if !$format;
43 7 50       26 $verbose && $self->verbose($verbose);
44 7         25 $self->format($format);
45 7         18 $self->handler_methods();
46             # if we intend at a later point we can add a Builder
47             #$builder && $self->alignbuilder($builder);
48 7         26 return $self;
49             }
50              
51             sub handler_methods {
52 7     7 1 13 my $self = shift;
53 7 50       20 if (!($self->{'handlers'})) {
54             $self->throw("No handlers defined for alignment format ",$self->format)
55 7 50       16 unless exists $HANDLERS{$self->format};
56 7         21 $self->{'handlers'} = $HANDLERS{$self->format};
57             }
58 7         13 return ($self->{'handlers'});
59             }
60              
61             sub data_handler {
62 562     562 1 761 my ($self, $data) = @_;
63 562   33     991 my $nm = $data->{NAME} || $self->throw("No name tag defined!");
64             # this should handle data on the fly w/o caching; any caching should be
65             # done in the driver!
66             my $method = (exists $self->{'handlers'}->{$nm}) ? ($self->{'handlers'}->{$nm}) :
67 562 50       1056 (exists $self->{'handlers'}->{'_DEFAULT_'}) ? ($self->{'handlers'}->{'_DEFAULT_'}) :
    100          
68             undef;
69 562 50       824 if (!$method) {
70 0         0 $self->debug("No handler defined for $nm\n");
71 0         0 return;
72             };
73 562         854 $self->$method($data);
74             }
75              
76             sub reset_parameters {
77 9     9 1 20 my $self = shift;
78 9         63 $self->{'_params'} = undef;
79 9         19 $self->{'_nse_cache'} = undef;
80 9         23 $self->{'_features'} = undef;
81             }
82              
83             sub format {
84 30     30 1 76 my $self = shift;
85 30 100       60 if (@_) {
86 7         17 my $format = lc shift;
87 7 50       22 $self->throw("Format $format not supported") unless exists $HANDLERS{$format};
88 7         15 $self->{'_alignformat'} = $format;
89             };
90 30         148 return $self->{'_alignformat'};
91             }
92              
93             sub get_params {
94 192     192 1 340 my ($self, @ids) = @_;
95 192         245 my $data;
96 192 100       334 if (scalar(@ids)) {
97 183         274 for my $id (@ids) {
98 201 50       453 if (!index($id, '-')==0) {
99 0         0 $id = '-'.$id ;
100             }
101 201 100       647 $data->{$id} = $self->{'_params'}->{$id} if (exists $self->{'_params'}->{$id});
102             }
103 183   100     332 $data ||= {};
104             } else {
105 9         18 $data = $self->{'_params'};
106             }
107 192         490 return $data;
108             }
109              
110             sub set_params {
111 0     0 1 0 shift->throw('Not implemented yet!');
112             }
113              
114             sub build_alignment {
115 9     9 1 168 my $self = shift;
116 9         17 my %init;
117 9         38 $self->process_seqs;
118 9         31 my $param = $self->get_params;
119 9 50       33 if (defined $param->{-seqs}) {
120 9         60 return Bio::SimpleAlign->new(%$param, -source => $self->format);
121             }
122 0         0 return;
123             }
124              
125             sub annotation_collection {
126 134     134 1 182 my ($self, $coll) = @_;
127 134 50       325 if ($coll) {
    100          
128 0 0 0     0 $self->throw("Must have Bio::AnnotationCollectionI ".
129             "when explicitly setting annotation_collection()")
130             unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI'));
131 0         0 $self->{'_params'}->{'-annotation'} = $coll;
132             } elsif (!exists($self->{'_params'}->{'-annotation'})) {
133 9         55 $self->{'_params'}->{'-annotation'} = Bio::Annotation::Collection->new()
134             }
135 134         336 return $self->{'_params'}->{'-annotation'};
136             }
137              
138             sub seq_annotation_collection {
139 0     0 1 0 my ($self, $coll) = @_;
140 0 0       0 if ($coll) {
    0          
141 0 0 0     0 $self->throw("Must have Bio::AnnotationCollectionI ".
142             "when explicitly setting seq_annotation_collection()")
143             unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI'));
144 0         0 $self->{'_params'}->{'-seq_annotation'} = $coll;
145             } elsif (!exists($self->{'_params'}->{'-seq_annotation'})) {
146 0         0 $self->{'_params'}->{'-seq_annotation'} = Bio::Annotation::Collection->new()
147             }
148 0         0 return $self->{'_params'}->{'-seq_annotation'};
149             }
150              
151             sub process_seqs {
152 9     9 1 18 my $self = shift;
153              
154 9         30 my $data = $self->get_params(qw(-seqs -seq_class -consensus_meta));
155 9   50     29 my $class = $data->{-seq_class} || 'Bio::LocatableSeq';
156             # cache classes loaded already
157 9 100       36 if (!exists($self->{'_loaded_modules'}->{$class})) {
158 5         37 $self->_load_module($class);
159 5         20 $self->{'_loaded_modules'}->{$class}++;
160             }
161             # process any meta sequence data
162 9 100 66     62 if ( $data->{-consensus_meta} && !UNIVERSAL::isa($data->{-consensus_meta},'Bio::Seq::Meta')) {
163 8         16 my $ref = $data->{-consensus_meta};
164 8 50       19 if (!exists($self->{'_loaded_modules'}->{'Bio::Seq::Meta'})) {
165 0         0 $self->_load_module('Bio::Seq::Meta');
166 0         0 $self->{'_loaded_modules'}->{'Bio::Seq::Meta'}++;
167             }
168 8         154 my $ms = Bio::Seq::Meta->new();
169 8         15 for my $tag (sort keys %{$ref}) {
  8         42  
170 10         36 $ms->named_meta($tag, $ref->{$tag});
171             }
172 8         19 $self->{'_params'}->{'-consensus_meta'} = $ms;
173             }
174             # this should always be an array ref!
175 9         17 for my $seq (@{$data->{-seqs}}) {
  9         23  
176 168 50       476 next if (UNIVERSAL::isa($seq,'Bio::LocatableI'));
177             # process anything else
178 168 50       608 $self->_from_nse($seq) if $seq->{NSE};
179 168 50       424 if (UNIVERSAL::isa($seq,'HASH')) {
180 168         198 my %param;
181 168         500 for my $p (keys %$seq) {
182 1503 50       3465 $param{'-'.lc $p} = $seq->{$p} if exists $seq->{$p};
183             }
184 168         686 my $ls = $class->new(%param);
185             # a little switcheroo to attach the sequence
186             # (though using it to get seq() doesn't work correctly yet!)
187 168 100 66     883 if (defined $seq->{NSE} &&
      100        
188             exists $self->{'_features'} &&
189             exists $self->{'_features'}->{ $seq->{NSE} }) {
190 1         3 for my $feat (@{ $self->{'_features'}->{ $seq->{NSE} } }) {
  1         3  
191 6         7 push @{ $self->{'_params'}->{'-features'} }, $feat;
  6         12  
192 6         13 $feat->attach_seq($ls);
193             }
194             }
195 168         824 $seq = $ls;
196             }
197             }
198             }
199              
200             ####################### SEQUENCE HANDLERS #######################
201              
202             # any sequence data for a Bio::Seq::Meta
203             sub _generic_metaseq {
204 319     319   377 my ($self, $data) = @_;
205 319 50       445 return unless $data;
206 319 50       478 $self->throw("No alignment position passed") if !exists($data->{BLOCK_LINE});
207 319 50       476 $self->throw("Alignment position must be an index greater than 0") if $data->{BLOCK_LINE} < 1;
208 319         388 $self->{'_params'}->{'-seq_class'} = 'Bio::Seq::Meta';
209 319         365 my $index = $data->{BLOCK_LINE} - 1;
210 319 100       634 if (my $nse = $self->{'_params'}->{'-seqs'}->[$index]->{NSE}) {
211 151 50       242 $self->throw("NSE in passed data doesn't match stored data in same position: $nse") unless $nse eq $data->{NSE};
212             } else {
213 168         331 $self->{'_params'}->{'-seqs'}->[$index]->{NSE} = $data->{NSE};
214             }
215 319 100       550 if ($data->{NAME} eq 'SEQUENCE') {
    50          
216 317         857 $self->{'_params'}->{'-seqs'}->[$index]->{SEQ} .= $data->{DATA};
217             } elsif ($data->{NAME} eq 'NAMED_META') {
218 2         8 $self->{'_params'}->{'-seqs'}->[$index]->{NAMED_META}->{$data->{META_TAG}} .= $data->{DATA};
219             }
220             }
221              
222             sub _generic_consensus_meta {
223 18     18   31 my ($self, $data) = @_;
224 18 50       34 return unless $data;
225 18 50       47 if ($data->{NAME} eq 'CONSENSUS_META') {
226 18         78 $self->{'_params'}->{'-consensus_meta'}->{$data->{META_TAG}} .= $data->{DATA};
227             }
228             }
229              
230             # any sequence data for a Bio::LocatableSeq
231             sub _generic_locatableseq {
232 0     0   0 my ($self, $data) = @_;
233 0 0       0 return unless $data;
234 0 0       0 $self->throw("No alignment position passed") if !exists($data->{BLOCK_LINE});
235 0 0       0 $self->throw("Alignment position must be an index greater than 0") if $data->{BLOCK_LINE} < 1;
236 0         0 my $index = $data->{BLOCK_LINE} - 1;
237 0 0       0 if (my $nse = $self->{'_params'}->{'-seqs'}->[$index]->{NSE}) {
238 0 0       0 $self->throw("NSE in passed data doesn't match stored data in same position: $nse") if $nse ne $data->{NSE};
239             } else {
240 0         0 $self->{'_params'}->{'-seqs'}->[$index]->{NSE} = $data->{NSE};
241             }
242 0 0       0 if ($data->{NAME} eq 'SEQUENCE') {
243 0         0 $self->{'_params'}->{'-seqs'}->[$index]->{SEQ} .= $data->{DATA};
244             }
245             }
246              
247             ####################### RAW DATA HANDLERS #######################
248              
249             # store by data name (ACCESSION, ID, etc), which can be mapped to the
250             # appropriate alignment or sequence parameter
251             sub _generic_store {
252 85     85   123 my ($self, $data) = @_;
253 85 50       133 return unless $data;
254 85 100       134 if ($data->{ALIGNMENT}) {
255 32         138 $self->{'_params'}->{'-'.lc $data->{NAME}} = $data->{DATA};
256             } else {
257             $self->{'_params'}->{'-seq_'.lc $data->{NAME}}->{$data->{NSE}} = $data->{DATA}
258 53         209 }
259             }
260              
261             sub _generic_reference {
262 22     22   34 my ($self, $data) = @_;
263             my $ref = Bio::Annotation::Reference->new(-title => $data->{TITLE},
264             -authors => $data->{AUTHORS},
265             -pubmed => $data->{PUBMED},
266             -location => $data->{JOURNAL},
267 22         133 -tagname => lc $data->{NAME});
268 22         58 $self->annotation_collection->add_Annotation($ref);
269             }
270              
271             sub _generic_simplevalue {
272 85     85   121 my ($self, $data) = @_;
273             my $sv = Bio::Annotation::SimpleValue->new(-value => $data->{DATA},
274 85         371 -tagname => lc $data->{NAME});
275 85         197 $self->annotation_collection->add_Annotation($sv);
276             }
277              
278             sub _generic_comment {
279 6     6   12 my ($self, $data) = @_;
280             my $comment = Bio::Annotation::Comment->new(-type => lc $data->{NAME},
281             -text => $data->{DATA},
282 6         55 -tagname => lc $data->{NAME});
283 6         18 $self->annotation_collection->add_Annotation($comment);
284             }
285              
286             # Some DBLinks in Stockholm format are unique, so a unique handler for them
287             sub _stockholm_target {
288 27     27   43 my ($self, $data) = @_;
289             # process database info
290 27         73 $self->_from_stk_dblink($data);
291 27         34 my $comment;
292             # Bio::Annotation::Target is now a DBLink, but has additional (RangeI)
293             # capabilities (for PDB data)
294             my $dblink = Bio::Annotation::Target->new(
295             -database => $data->{DBLINK_DB},
296             -primary_id => $data->{DBLINK_ACC},
297             -optional_id => $data->{DBLINK_OPT},
298             -start => $data->{DBLINK_START},
299             -end => $data->{DBLINK_END},
300             -strand => $data->{DBLINK_STRAND},
301 27         189 -comment => $comment,
302             -tagname => 'dblink',
303             );
304 27 100       106 if ($data->{ALIGNMENT}) {
305             # Alignment-specific DBLinks
306 21         52 $self->annotation_collection->add_Annotation($dblink);
307             } else {
308             # Sequence-specific DBLinks
309             # These should come with identifying information of some sort
310             # (ID/START/END/STRAND). Make into a SeqFeature (SimpleAlign is
311             # FeatureHolderI) spanning the length acc. to the NSE. Add the DBLink as
312             # Annotation specific to that SeqFeature, store in an internal hash by
313             # NSE so we can tie the LocatableSeq to the proper Features
314 6 50       28 $self->_from_nse($data) if $data->{NSE};
315             $self->throw("Must supply an sequence DISPLAY_ID or NSE for sequence-related
316 6 0 33     13 DBLinks") unless $data->{ACCESSION_NUMBER} || $data->{DISPLAY_ID};
317             my $sf = Bio::SeqFeature::Generic->new(-seq_id => $data->{DISPLAY_ID},
318             -accession_number => $data->{ACCESSION_NUMBER},
319             -start => $data->{START},
320             -end => $data->{END},
321             -strand => $data->{STRAND}
322 6         41 );
323 6         18 $sf->annotation->add_Annotation($dblink);
324             # index by NSE
325 6         12 push @{ $self->{'_features'}->{ $data->{NSE} } }, $sf;
  6         30  
326             #$self->seq_annotation_collection->add_Annotation($dblink);
327             }
328             }
329              
330             ####################### HELPER METHODS #######################
331              
332             # returns ACCESSION VERSION START END STRAND ALPHABET
333             # cached for multiple lookups, should reset in between uses
334             sub _from_nse {
335 174     174   257 my ($self, $data) = @_;
336 174 50       344 return unless my $nse = $data->{NSE};
337 174   100     344 $data->{ALPHABET} = $self->get_params('-alphabet')->{'-alphabet'} || 'protein';
338             # grab any accessions if present, switch out with ACCESSION from NSE
339             # (move that to primary_id)
340 174         298 my $new_acc;
341 174 100       318 if (exists $self->{'_params'}->{'-seq_accession'}) {
342 59         114 $new_acc = $self->{'_params'}->{'-seq_accession'}->{$data->{NSE}};
343             }
344 174 100       1068 if ($nse =~ m{(\S+?)(?:\.(\d+))?/(\d+)-(\d+)}xmso) {
345 172 100 66     651 my $strand = $data->{ALPHABET} eq 'dna' || $data->{ALPHABET} eq 'rna' ? 1 : undef;
346 172         598 my ($start, $end) = ($3, $4);
347 172 100       454 if ($start > $end) {
348 44         106 ($start, $end, $strand) = ($end, $start, -1);
349             }
350 172   66     519 $data->{ACCESSION_NUMBER} = $new_acc || $1;
351 172         286 $data->{DISPLAY_ID} = $1;
352 172         282 $data->{VERSION} = $2;
353 172         246 $data->{START} = $start;
354 172         418 $data->{END} = $end;
355 172         312 $data->{STRAND} = $strand;
356             } else {
357             # we can parse for version here if needed
358 2         7 $data->{DISPLAY_ID} = $data->{NSE};
359             }
360             }
361              
362             # this will probably be split up into subhandlers based on Record/DB
363             sub _from_stk_dblink {
364 27     27   50 my ($self, $data) = @_;
365 27 50       62 return unless my $raw = $data->{DATA};
366 27         146 my @rawdata = split(m{\s*;\s*}, $raw);
367 27         66 my %dblink_data;
368 27 100       76 if ($rawdata[0] eq 'PDB') {
    100          
369             # fix for older Stockholm PDB range format
370 12 50 33     37 if (scalar(@rawdata) == 3 && $rawdata[2] =~ m{-}) {
371 0         0 @rawdata[2,3] = split('-',$rawdata[2],2);
372             }
373 12 50       29 $self->throw("Not standard PDB form: ".$data->{DATA}) if scalar(@rawdata) != 4;
374 12         44 my ($main, $chain) = split(m{\s+}, $rawdata[1]);
375 12   50     68 %dblink_data = (
376             DBLINK_DB => $rawdata[0],
377             DBLINK_ACC => $main,
378             DBLINK_OPT => $chain || '',
379             DBLINK_START => $rawdata[2],
380             DBLINK_END => $rawdata[3]
381             );
382             } elsif ($rawdata[0] eq 'SCOP') {
383 3 50       10 $self->throw("Not standard SCOP form: ".$data->{DATA}) if scalar(@rawdata) != 3;
384 3         14 %dblink_data = (
385             DBLINK_DB => $rawdata[0],
386             DBLINK_ACC => $rawdata[1],
387             DBLINK_OPT => $rawdata[2],
388             );
389             } else {
390 12 50       29 $self->warn("Some data missed: ".$data->{DATA}) if scalar(@rawdata) > 2;
391 12         38 %dblink_data = (
392             DBLINK_DB => $rawdata[0],
393             DBLINK_ACC => $rawdata[1],
394             );
395             }
396 27         99 while (my ($k, $v) = each %dblink_data) {
397 93 50       314 $data->{$k} = $v if $v;
398             }
399             }
400              
401             1;
402              
403             __END__