File Coverage

Bio/AlignIO/Handler/GenericAlignHandler.pm
Criterion Covered Total %
statement 185 211 87.6
branch 67 120 55.8
condition 19 38 50.0
subroutine 29 32 90.6
pod 11 11 100.0
total 311 412 75.4


line stmt bran cond sub pod time code
1             # Let the code begin...
2              
3             package Bio::AlignIO::Handler::GenericAlignHandler;
4              
5 3     3   13 use strict;
  3         3  
  3         73  
6 3     3   10 use warnings;
  3         3  
  3         73  
7              
8 3     3   277 use Bio::Annotation::Collection;
  3         3  
  3         63  
9 3     3   517 use Bio::Annotation::Comment;
  3         5  
  3         56  
10 3     3   13 use Bio::Annotation::SimpleValue;
  3         4  
  3         115  
11 3     3   883 use Bio::Annotation::Target;
  3         5  
  3         69  
12 3     3   12 use Bio::Annotation::DBLink;
  3         3  
  3         43  
13 3     3   598 use Bio::Annotation::Reference;
  3         5  
  3         60  
14 3     3   549 use Bio::SimpleAlign;
  3         5  
  3         62  
15 3     3   13 use Data::Dumper;
  3         3  
  3         187  
16              
17 3     3   13 use base qw(Bio::Root::Root Bio::HandlerBaseI);
  3         4  
  3         1001  
18              
19             # only stockholm is defined for now...
20             my %HANDLERS = (
21             # stockholm has sequence and alignment specific annotation; this
22             'stockholm' => {
23             'CONSENSUS_META' => \&_generic_consensus_meta,
24             'SEQUENCE' => \&_generic_metaseq,
25             'NAMED_META' => \&_generic_metaseq,
26             'ACCESSION' => \&_generic_store,
27             'ALPHABET' => \&_generic_store,
28             'ID' => \&_generic_store,
29             'DESCRIPTION' => \&_generic_store,
30             'REFERENCE' => \&_generic_reference,
31             'DBLINK' => \&_stockholm_target,
32             'DATABASE_COMMENT' => \&_generic_comment,
33             'ALIGNMENT_COMMENT' => \&_generic_comment,
34             '_DEFAULT_' => \&_generic_simplevalue
35             },
36             );
37              
38             sub new {
39 7     7 1 15 my ($class, @args) = @_;
40 7         29 my $self = $class->SUPER::new(@args);
41 7         28 my ($format, $verbose) = $self->_rearrange([qw(FORMAT VERBOSE)], @args);
42 7 50       20 $self->throw("Must define alignment record format") if !$format;
43 7 50       18 $verbose && $self->verbose($verbose);
44 7         21 $self->format($format);
45 7         18 $self->handler_methods();
46             # if we intend at a later point we can add a Builder
47             #$builder && $self->alignbuilder($builder);
48 7         23 return $self;
49             }
50              
51             sub handler_methods {
52 7     7 1 8 my $self = shift;
53 7 50       18 if (!($self->{'handlers'})) {
54             $self->throw("No handlers defined for alignment format ",$self->format)
55 7 50       16 unless exists $HANDLERS{$self->format};
56 7         12 $self->{'handlers'} = $HANDLERS{$self->format};
57             }
58 7         10 return ($self->{'handlers'});
59             }
60              
61             sub data_handler {
62 562     562 1 501 my ($self, $data) = @_;
63 562   33     889 my $nm = $data->{NAME} || $self->throw("No name tag defined!");
64             # this should handle data on the fly w/o caching; any caching should be
65             # done in the driver!
66             my $method = (exists $self->{'handlers'}->{$nm}) ? ($self->{'handlers'}->{$nm}) :
67 562 50       982 (exists $self->{'handlers'}->{'_DEFAULT_'}) ? ($self->{'handlers'}->{'_DEFAULT_'}) :
    100          
68             undef;
69 562 50       736 if (!$method) {
70 0         0 $self->debug("No handler defined for $nm\n");
71 0         0 return;
72             };
73 562         761 $self->$method($data);
74             }
75              
76             sub reset_parameters {
77 9     9 1 10 my $self = shift;
78 9         14 $self->{'_params'} = undef;
79 9         36 $self->{'_nse_cache'} = undef;
80 9         13 $self->{'_features'} = undef;
81             }
82              
83             sub format {
84 30     30 1 33 my $self = shift;
85 30 100       48 if (@_) {
86 7         14 my $format = lc shift;
87 7 50       17 $self->throw("Format $format not supported") unless exists $HANDLERS{$format};
88 7         13 $self->{'_alignformat'} = $format;
89             };
90 30         96 return $self->{'_alignformat'};
91             }
92              
93             sub get_params {
94 192     192 1 207 my ($self, @ids) = @_;
95 192         121 my $data;
96 192 100       264 if (scalar(@ids)) {
97 183         188 for my $id (@ids) {
98 201 50       367 if (!index($id, '-')==0) {
99 0         0 $id = '-'.$id ;
100             }
101 201 100       555 $data->{$id} = $self->{'_params'}->{$id} if (exists $self->{'_params'}->{$id});
102             }
103 183   100     292 $data ||= {};
104             } else {
105 9         13 $data = $self->{'_params'};
106             }
107 192         393 return $data;
108             }
109              
110             sub set_params {
111 0     0 1 0 shift->throw('Not implemented yet!');
112             }
113              
114             sub build_alignment {
115 9     9 1 13 my $self = shift;
116 9         10 my %init;
117 9         23 $self->process_seqs;
118 9         23 my $param = $self->get_params;
119 9 50       35 if (defined $param->{-seqs}) {
120 9         44 return Bio::SimpleAlign->new(%$param, -source => $self->format);
121             }
122 0         0 return;
123             }
124              
125             sub annotation_collection {
126 134     134 1 126 my ($self, $coll) = @_;
127 134 50       331 if ($coll) {
    100          
128 0 0 0     0 $self->throw("Must have Bio::AnnotationCollectionI ".
129             "when explicitly setting annotation_collection()")
130             unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI'));
131 0         0 $self->{'_params'}->{'-annotation'} = $coll;
132             } elsif (!exists($self->{'_params'}->{'-annotation'})) {
133 9         43 $self->{'_params'}->{'-annotation'} = Bio::Annotation::Collection->new()
134             }
135 134         331 return $self->{'_params'}->{'-annotation'};
136             }
137              
138             sub seq_annotation_collection {
139 0     0 1 0 my ($self, $coll) = @_;
140 0 0       0 if ($coll) {
    0          
141 0 0 0     0 $self->throw("Must have Bio::AnnotationCollectionI ".
142             "when explicitly setting seq_annotation_collection()")
143             unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI'));
144 0         0 $self->{'_params'}->{'-seq_annotation'} = $coll;
145             } elsif (!exists($self->{'_params'}->{'-seq_annotation'})) {
146 0         0 $self->{'_params'}->{'-seq_annotation'} = Bio::Annotation::Collection->new()
147             }
148 0         0 return $self->{'_params'}->{'-seq_annotation'};
149             }
150              
151             sub process_seqs {
152 9     9 1 13 my $self = shift;
153              
154 9         20 my $data = $self->get_params(qw(-seqs -seq_class -consensus_meta));
155 9   50     27 my $class = $data->{-seq_class} || 'Bio::LocatableSeq';
156             # cache classes loaded already
157 9 100       25 if (!exists($self->{'_loaded_modules'}->{$class})) {
158 5         29 $self->_load_module($class);
159 5         15 $self->{'_loaded_modules'}->{$class}++;
160             }
161             # process any meta sequence data
162 9 100 66     63 if ( $data->{-consensus_meta} && !UNIVERSAL::isa($data->{-consensus_meta},'Bio::Seq::Meta')) {
163 8         14 my $ref = $data->{-consensus_meta};
164 8 50       22 if (!exists($self->{'_loaded_modules'}->{'Bio::Seq::Meta'})) {
165 0         0 $self->_load_module('Bio::Seq::Meta');
166 0         0 $self->{'_loaded_modules'}->{'Bio::Seq::Meta'}++;
167             }
168 8         41 my $ms = Bio::Seq::Meta->new();
169 8         11 for my $tag (sort keys %{$ref}) {
  8         34  
170 10         23 $ms->named_meta($tag, $ref->{$tag});
171             }
172 8         14 $self->{'_params'}->{'-consensus_meta'} = $ms;
173             }
174             # this should always be an array ref!
175 9         10 for my $seq (@{$data->{-seqs}}) {
  9         25  
176 168 50       386 next if (UNIVERSAL::isa($seq,'Bio::LocatableI'));
177             # process anything else
178 168 50       415 $self->_from_nse($seq) if $seq->{NSE};
179 168 50       317 if (UNIVERSAL::isa($seq,'HASH')) {
180 168         127 my %param;
181 168         373 for my $p (keys %$seq) {
182 1503 50       2657 $param{'-'.lc $p} = $seq->{$p} if exists $seq->{$p};
183             }
184 168         550 my $ls = $class->new(%param);
185             # a little switcheroo to attach the sequence
186             # (though using it to get seq() doesn't work correctly yet!)
187 168 100 33     716 if (defined $seq->{NSE} &&
      100        
188             exists $self->{'_features'} &&
189             exists $self->{'_features'}->{ $seq->{NSE} }) {
190 1         2 for my $feat (@{ $self->{'_features'}->{ $seq->{NSE} } }) {
  1         3  
191 6         3 push @{ $self->{'_params'}->{'-features'} }, $feat;
  6         9  
192 6         10 $feat->attach_seq($ls);
193             }
194             }
195 168         615 $seq = $ls;
196             }
197             }
198             }
199              
200             ####################### SEQUENCE HANDLERS #######################
201              
202             # any sequence data for a Bio::Seq::Meta
203             sub _generic_metaseq {
204 319     319   225 my ($self, $data) = @_;
205 319 50       372 return unless $data;
206 319 50       395 $self->throw("No alignment position passed") if !exists($data->{BLOCK_LINE});
207 319 50       428 $self->throw("Alignment position must be an index greater than 0") if $data->{BLOCK_LINE} < 1;
208 319         279 $self->{'_params'}->{'-seq_class'} = 'Bio::Seq::Meta';
209 319         267 my $index = $data->{BLOCK_LINE} - 1;
210 319 100       481 if (my $nse = $self->{'_params'}->{'-seqs'}->[$index]->{NSE}) {
211 151 50       225 $self->throw("NSE in passed data doesn't match stored data in same position: $nse") unless $nse eq $data->{NSE};
212             } else {
213 168         255 $self->{'_params'}->{'-seqs'}->[$index]->{NSE} = $data->{NSE};
214             }
215 319 100       363 if ($data->{NAME} eq 'SEQUENCE') {
    50          
216 317         696 $self->{'_params'}->{'-seqs'}->[$index]->{SEQ} .= $data->{DATA};
217             } elsif ($data->{NAME} eq 'NAMED_META') {
218 2         8 $self->{'_params'}->{'-seqs'}->[$index]->{NAMED_META}->{$data->{META_TAG}} .= $data->{DATA};
219             }
220             }
221              
222             sub _generic_consensus_meta {
223 18     18   15 my ($self, $data) = @_;
224 18 50       29 return unless $data;
225 18 50       32 if ($data->{NAME} eq 'CONSENSUS_META') {
226 18         53 $self->{'_params'}->{'-consensus_meta'}->{$data->{META_TAG}} .= $data->{DATA};
227             }
228             }
229              
230             # any sequence data for a Bio::LocatableSeq
231             sub _generic_locatableseq {
232 0     0   0 my ($self, $data) = @_;
233 0 0       0 return unless $data;
234 0 0       0 $self->throw("No alignment position passed") if !exists($data->{BLOCK_LINE});
235 0 0       0 $self->throw("Alignment position must be an index greater than 0") if $data->{BLOCK_LINE} < 1;
236 0         0 my $index = $data->{BLOCK_LINE} - 1;
237 0 0       0 if (my $nse = $self->{'_params'}->{'-seqs'}->[$index]->{NSE}) {
238 0 0       0 $self->throw("NSE in passed data doesn't match stored data in same position: $nse") if $nse ne $data->{NSE};
239             } else {
240 0         0 $self->{'_params'}->{'-seqs'}->[$index]->{NSE} = $data->{NSE};
241             }
242 0 0       0 if ($data->{NAME} eq 'SEQUENCE') {
243 0         0 $self->{'_params'}->{'-seqs'}->[$index]->{SEQ} .= $data->{DATA};
244             }
245             }
246              
247             ####################### RAW DATA HANDLERS #######################
248              
249             # store by data name (ACCESSION, ID, etc), which can be mapped to the
250             # appropriate alignment or sequence parameter
251             sub _generic_store {
252 85     85   84 my ($self, $data) = @_;
253 85 50       109 return unless $data;
254 85 100       113 if ($data->{ALIGNMENT}) {
255 32         113 $self->{'_params'}->{'-'.lc $data->{NAME}} = $data->{DATA};
256             } else {
257             $self->{'_params'}->{'-seq_'.lc $data->{NAME}}->{$data->{NSE}} = $data->{DATA}
258 53         205 }
259             }
260              
261             sub _generic_reference {
262 22     22   22 my ($self, $data) = @_;
263             my $ref = Bio::Annotation::Reference->new(-title => $data->{TITLE},
264             -authors => $data->{AUTHORS},
265             -pubmed => $data->{PUBMED},
266             -location => $data->{JOURNAL},
267 22         146 -tagname => lc $data->{NAME});
268 22         58 $self->annotation_collection->add_Annotation($ref);
269             }
270              
271             sub _generic_simplevalue {
272 85     85   83 my ($self, $data) = @_;
273             my $sv = Bio::Annotation::SimpleValue->new(-value => $data->{DATA},
274 85         337 -tagname => lc $data->{NAME});
275 85         152 $self->annotation_collection->add_Annotation($sv);
276             }
277              
278             sub _generic_comment {
279 6     6   287 my ($self, $data) = @_;
280             my $comment = Bio::Annotation::Comment->new(-type => lc $data->{NAME},
281             -text => $data->{DATA},
282 6         48 -tagname => lc $data->{NAME});
283 6         15 $self->annotation_collection->add_Annotation($comment);
284             }
285              
286             # Some DBLinks in Stockholm format are unique, so a unique handler for them
287             sub _stockholm_target {
288 27     27   27 my ($self, $data) = @_;
289             # process database info
290 27         54 $self->_from_stk_dblink($data);
291 27         25 my $comment;
292             # Bio::Annotation::Target is now a DBLink, but has additional (RangeI)
293             # capabilities (for PDB data)
294             my $dblink = Bio::Annotation::Target->new(
295             -database => $data->{DBLINK_DB},
296             -primary_id => $data->{DBLINK_ACC},
297             -optional_id => $data->{DBLINK_OPT},
298             -start => $data->{DBLINK_START},
299             -end => $data->{DBLINK_END},
300             -strand => $data->{DBLINK_STRAND},
301 27         207 -comment => $comment,
302             -tagname => 'dblink',
303             );
304 27 100       79 if ($data->{ALIGNMENT}) {
305             # Alignment-specific DBLinks
306 21         49 $self->annotation_collection->add_Annotation($dblink);
307             } else {
308             # Sequence-specific DBLinks
309             # These should come with identifying information of some sort
310             # (ID/START/END/STRAND). Make into a SeqFeature (SimpleAlign is
311             # FeatureHolderI) spanning the length acc. to the NSE. Add the DBLink as
312             # Annotation specific to that SeqFeature, store in an internal hash by
313             # NSE so we can tie the LocatableSeq to the proper Features
314 6 50       15 $self->_from_nse($data) if $data->{NSE};
315             $self->throw("Must supply an sequence DISPLAY_ID or NSE for sequence-related
316 6 0 33     11 DBLinks") unless $data->{ACCESSION_NUMBER} || $data->{DISPLAY_ID};
317             my $sf = Bio::SeqFeature::Generic->new(-seq_id => $data->{DISPLAY_ID},
318             -accession_number => $data->{ACCESSION_NUMBER},
319             -start => $data->{START},
320             -end => $data->{END},
321             -strand => $data->{STRAND}
322 6         30 );
323 6         11 $sf->annotation->add_Annotation($dblink);
324             # index by NSE
325 6         5 push @{ $self->{'_features'}->{ $data->{NSE} } }, $sf;
  6         18  
326             #$self->seq_annotation_collection->add_Annotation($dblink);
327             }
328             }
329              
330             ####################### HELPER METHODS #######################
331              
332             # returns ACCESSION VERSION START END STRAND ALPHABET
333             # cached for multiple lookups, should reset in between uses
334             sub _from_nse {
335 174     174   146 my ($self, $data) = @_;
336 174 50       289 return unless my $nse = $data->{NSE};
337 174   100     266 $data->{ALPHABET} = $self->get_params('-alphabet')->{'-alphabet'} || 'protein';
338             # grab any accessions if present, switch out with ACCESSION from NSE
339             # (move that to primary_id)
340 174         178 my $new_acc;
341 174 100       256 if (exists $self->{'_params'}->{'-seq_accession'}) {
342 59         82 $new_acc = $self->{'_params'}->{'-seq_accession'}->{$data->{NSE}};
343             }
344 174 100       894 if ($nse =~ m{(\S+?)(?:\.(\d+))?/(\d+)-(\d+)}xmso) {
345 172 100 66     600 my $strand = $data->{ALPHABET} eq 'dna' || $data->{ALPHABET} eq 'rna' ? 1 : undef;
346 172         346 my ($start, $end) = ($3, $4);
347 172 100       363 if ($start > $end) {
348 44         77 ($start, $end, $strand) = ($end, $start, -1);
349             }
350 172   66     375 $data->{ACCESSION_NUMBER} = $new_acc || $1;
351 172         206 $data->{DISPLAY_ID} = $1;
352 172         225 $data->{VERSION} = $2;
353 172         157 $data->{START} = $start;
354 172         273 $data->{END} = $end;
355 172         237 $data->{STRAND} = $strand;
356             } else {
357             # we can parse for version here if needed
358 2         236 $data->{DISPLAY_ID} = $data->{NSE};
359             }
360             }
361              
362             # this will probably be split up into subhandlers based on Record/DB
363             sub _from_stk_dblink {
364 27     27   26 my ($self, $data) = @_;
365 27 50       57 return unless my $raw = $data->{DATA};
366 27         162 my @rawdata = split(m{\s*;\s*}, $raw);
367 27         29 my %dblink_data;
368 27 100       61 if ($rawdata[0] eq 'PDB') {
    100          
369             # fix for older Stockholm PDB range format
370 12 50 33     35 if (scalar(@rawdata) == 3 && $rawdata[2] =~ m{-}) {
371 0         0 @rawdata[2,3] = split('-',$rawdata[2],2);
372             }
373 12 50       24 $self->throw("Not standard PDB form: ".$data->{DATA}) if scalar(@rawdata) != 4;
374 12         40 my ($main, $chain) = split(m{\s+}, $rawdata[1]);
375 12   50     70 %dblink_data = (
376             DBLINK_DB => $rawdata[0],
377             DBLINK_ACC => $main,
378             DBLINK_OPT => $chain || '',
379             DBLINK_START => $rawdata[2],
380             DBLINK_END => $rawdata[3]
381             );
382             } elsif ($rawdata[0] eq 'SCOP') {
383 3 50       9 $self->throw("Not standard SCOP form: ".$data->{DATA}) if scalar(@rawdata) != 3;
384 3         15 %dblink_data = (
385             DBLINK_DB => $rawdata[0],
386             DBLINK_ACC => $rawdata[1],
387             DBLINK_OPT => $rawdata[2],
388             );
389             } else {
390 12 50       24 $self->warn("Some data missed: ".$data->{DATA}) if scalar(@rawdata) > 2;
391 12         33 %dblink_data = (
392             DBLINK_DB => $rawdata[0],
393             DBLINK_ACC => $rawdata[1],
394             );
395             }
396 27         78 while (my ($k, $v) = each %dblink_data) {
397 93 50       338 $data->{$k} = $v if $v;
398             }
399             }
400              
401             1;
402              
403             __END__