File Coverage

Bio/AlignIO/Handler/GenericAlignHandler.pm
Criterion Covered Total %
statement 185 211 87.6
branch 67 120 55.8
condition 19 38 50.0
subroutine 29 32 90.6
pod 11 11 100.0
total 311 412 75.4


line stmt bran cond sub pod time code
1             # Let the code begin...
2              
3             package Bio::AlignIO::Handler::GenericAlignHandler;
4              
5 3     3   12 use strict;
  3         4  
  3         74  
6 3     3   9 use warnings;
  3         3  
  3         73  
7              
8 3     3   418 use Bio::Annotation::Collection;
  3         3  
  3         70  
9 3     3   604 use Bio::Annotation::Comment;
  3         4  
  3         63  
10 3     3   12 use Bio::Annotation::SimpleValue;
  3         4  
  3         46  
11 3     3   873 use Bio::Annotation::Target;
  3         5  
  3         67  
12 3     3   12 use Bio::Annotation::DBLink;
  3         3  
  3         41  
13 3     3   572 use Bio::Annotation::Reference;
  3         4  
  3         60  
14 3     3   503 use Bio::SimpleAlign;
  3         4  
  3         73  
15 3     3   11 use Data::Dumper;
  3         2  
  3         156  
16              
17 3     3   10 use base qw(Bio::Root::Root Bio::HandlerBaseI);
  3         3  
  3         988  
18              
19             # only stockholm is defined for now...
20             my %HANDLERS = (
21             # stockholm has sequence and alignment specific annotation; this
22             'stockholm' => {
23             'CONSENSUS_META' => \&_generic_consensus_meta,
24             'SEQUENCE' => \&_generic_metaseq,
25             'NAMED_META' => \&_generic_metaseq,
26             'ACCESSION' => \&_generic_store,
27             'ALPHABET' => \&_generic_store,
28             'ID' => \&_generic_store,
29             'DESCRIPTION' => \&_generic_store,
30             'REFERENCE' => \&_generic_reference,
31             'DBLINK' => \&_stockholm_target,
32             'DATABASE_COMMENT' => \&_generic_comment,
33             'ALIGNMENT_COMMENT' => \&_generic_comment,
34             '_DEFAULT_' => \&_generic_simplevalue
35             },
36             );
37              
38             sub new {
39 7     7 1 19 my ($class, @args) = @_;
40 7         34 my $self = $class->SUPER::new(@args);
41 7         34 my ($format, $verbose) = $self->_rearrange([qw(FORMAT VERBOSE)], @args);
42 7 50       21 $self->throw("Must define alignment record format") if !$format;
43 7 50       16 $verbose && $self->verbose($verbose);
44 7         18 $self->format($format);
45 7         16 $self->handler_methods();
46             # if we intend at a later point we can add a Builder
47             #$builder && $self->alignbuilder($builder);
48 7         22 return $self;
49             }
50              
51             sub handler_methods {
52 7     7 1 8 my $self = shift;
53 7 50       18 if (!($self->{'handlers'})) {
54             $self->throw("No handlers defined for alignment format ",$self->format)
55 7 50       14 unless exists $HANDLERS{$self->format};
56 7         13 $self->{'handlers'} = $HANDLERS{$self->format};
57             }
58 7         12 return ($self->{'handlers'});
59             }
60              
61             sub data_handler {
62 562     562 1 496 my ($self, $data) = @_;
63 562   33     955 my $nm = $data->{NAME} || $self->throw("No name tag defined!");
64             # this should handle data on the fly w/o caching; any caching should be
65             # done in the driver!
66             my $method = (exists $self->{'handlers'}->{$nm}) ? ($self->{'handlers'}->{$nm}) :
67 562 50       973 (exists $self->{'handlers'}->{'_DEFAULT_'}) ? ($self->{'handlers'}->{'_DEFAULT_'}) :
    100          
68             undef;
69 562 50       731 if (!$method) {
70 0         0 $self->debug("No handler defined for $nm\n");
71 0         0 return;
72             };
73 562         712 $self->$method($data);
74             }
75              
76             sub reset_parameters {
77 9     9 1 9 my $self = shift;
78 9         12 $self->{'_params'} = undef;
79 9         37 $self->{'_nse_cache'} = undef;
80 9         16 $self->{'_features'} = undef;
81             }
82              
83             sub format {
84 30     30 1 31 my $self = shift;
85 30 100       48 if (@_) {
86 7         12 my $format = lc shift;
87 7 50       18 $self->throw("Format $format not supported") unless exists $HANDLERS{$format};
88 7         15 $self->{'_alignformat'} = $format;
89             };
90 30         98 return $self->{'_alignformat'};
91             }
92              
93             sub get_params {
94 192     192 1 244 my ($self, @ids) = @_;
95 192         146 my $data;
96 192 100       280 if (scalar(@ids)) {
97 183         239 for my $id (@ids) {
98 201 50       402 if (!index($id, '-')==0) {
99 0         0 $id = '-'.$id ;
100             }
101 201 100       617 $data->{$id} = $self->{'_params'}->{$id} if (exists $self->{'_params'}->{$id});
102             }
103 183   100     310 $data ||= {};
104             } else {
105 9         14 $data = $self->{'_params'};
106             }
107 192         457 return $data;
108             }
109              
110             sub set_params {
111 0     0 1 0 shift->throw('Not implemented yet!');
112             }
113              
114             sub build_alignment {
115 9     9 1 12 my $self = shift;
116 9         10 my %init;
117 9         19 $self->process_seqs;
118 9         22 my $param = $self->get_params;
119 9 50       27 if (defined $param->{-seqs}) {
120 9         40 return Bio::SimpleAlign->new(%$param, -source => $self->format);
121             }
122 0         0 return;
123             }
124              
125             sub annotation_collection {
126 134     134 1 124 my ($self, $coll) = @_;
127 134 50       301 if ($coll) {
    100          
128 0 0 0     0 $self->throw("Must have Bio::AnnotationCollectionI ".
129             "when explicitly setting annotation_collection()")
130             unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI'));
131 0         0 $self->{'_params'}->{'-annotation'} = $coll;
132             } elsif (!exists($self->{'_params'}->{'-annotation'})) {
133 9         44 $self->{'_params'}->{'-annotation'} = Bio::Annotation::Collection->new()
134             }
135 134         312 return $self->{'_params'}->{'-annotation'};
136             }
137              
138             sub seq_annotation_collection {
139 0     0 1 0 my ($self, $coll) = @_;
140 0 0       0 if ($coll) {
    0          
141 0 0 0     0 $self->throw("Must have Bio::AnnotationCollectionI ".
142             "when explicitly setting seq_annotation_collection()")
143             unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI'));
144 0         0 $self->{'_params'}->{'-seq_annotation'} = $coll;
145             } elsif (!exists($self->{'_params'}->{'-seq_annotation'})) {
146 0         0 $self->{'_params'}->{'-seq_annotation'} = Bio::Annotation::Collection->new()
147             }
148 0         0 return $self->{'_params'}->{'-seq_annotation'};
149             }
150              
151             sub process_seqs {
152 9     9 1 13 my $self = shift;
153              
154 9         21 my $data = $self->get_params(qw(-seqs -seq_class -consensus_meta));
155 9   50     31 my $class = $data->{-seq_class} || 'Bio::LocatableSeq';
156             # cache classes loaded already
157 9 100       26 if (!exists($self->{'_loaded_modules'}->{$class})) {
158 5         25 $self->_load_module($class);
159 5         10 $self->{'_loaded_modules'}->{$class}++;
160             }
161             # process any meta sequence data
162 9 100 66     64 if ( $data->{-consensus_meta} && !UNIVERSAL::isa($data->{-consensus_meta},'Bio::Seq::Meta')) {
163 8         13 my $ref = $data->{-consensus_meta};
164 8 50       20 if (!exists($self->{'_loaded_modules'}->{'Bio::Seq::Meta'})) {
165 0         0 $self->_load_module('Bio::Seq::Meta');
166 0         0 $self->{'_loaded_modules'}->{'Bio::Seq::Meta'}++;
167             }
168 8         36 my $ms = Bio::Seq::Meta->new();
169 8         11 for my $tag (sort keys %{$ref}) {
  8         32  
170 10         27 $ms->named_meta($tag, $ref->{$tag});
171             }
172 8         16 $self->{'_params'}->{'-consensus_meta'} = $ms;
173             }
174             # this should always be an array ref!
175 9         10 for my $seq (@{$data->{-seqs}}) {
  9         23  
176 168 50       393 next if (UNIVERSAL::isa($seq,'Bio::LocatableI'));
177             # process anything else
178 168 50       466 $self->_from_nse($seq) if $seq->{NSE};
179 168 50       384 if (UNIVERSAL::isa($seq,'HASH')) {
180 168         147 my %param;
181 168         446 for my $p (keys %$seq) {
182 1503 50       3226 $param{'-'.lc $p} = $seq->{$p} if exists $seq->{$p};
183             }
184 168         652 my $ls = $class->new(%param);
185             # a little switcheroo to attach the sequence
186             # (though using it to get seq() doesn't work correctly yet!)
187 168 100 33     856 if (defined $seq->{NSE} &&
      100        
188             exists $self->{'_features'} &&
189             exists $self->{'_features'}->{ $seq->{NSE} }) {
190 1         2 for my $feat (@{ $self->{'_features'}->{ $seq->{NSE} } }) {
  1         2  
191 6         5 push @{ $self->{'_params'}->{'-features'} }, $feat;
  6         8  
192 6         34 $feat->attach_seq($ls);
193             }
194             }
195 168         747 $seq = $ls;
196             }
197             }
198             }
199              
200             ####################### SEQUENCE HANDLERS #######################
201              
202             # any sequence data for a Bio::Seq::Meta
203             sub _generic_metaseq {
204 319     319   279 my ($self, $data) = @_;
205 319 50       428 return unless $data;
206 319 50       436 $self->throw("No alignment position passed") if !exists($data->{BLOCK_LINE});
207 319 50       526 $self->throw("Alignment position must be an index greater than 0") if $data->{BLOCK_LINE} < 1;
208 319         366 $self->{'_params'}->{'-seq_class'} = 'Bio::Seq::Meta';
209 319         300 my $index = $data->{BLOCK_LINE} - 1;
210 319 100       621 if (my $nse = $self->{'_params'}->{'-seqs'}->[$index]->{NSE}) {
211 151 50       279 $self->throw("NSE in passed data doesn't match stored data in same position: $nse") unless $nse eq $data->{NSE};
212             } else {
213 168         350 $self->{'_params'}->{'-seqs'}->[$index]->{NSE} = $data->{NSE};
214             }
215 319 100       430 if ($data->{NAME} eq 'SEQUENCE') {
    50          
216 317         1224 $self->{'_params'}->{'-seqs'}->[$index]->{SEQ} .= $data->{DATA};
217             } elsif ($data->{NAME} eq 'NAMED_META') {
218 2         8 $self->{'_params'}->{'-seqs'}->[$index]->{NAMED_META}->{$data->{META_TAG}} .= $data->{DATA};
219             }
220             }
221              
222             sub _generic_consensus_meta {
223 18     18   21 my ($self, $data) = @_;
224 18 50       26 return unless $data;
225 18 50       34 if ($data->{NAME} eq 'CONSENSUS_META') {
226 18         68 $self->{'_params'}->{'-consensus_meta'}->{$data->{META_TAG}} .= $data->{DATA};
227             }
228             }
229              
230             # any sequence data for a Bio::LocatableSeq
231             sub _generic_locatableseq {
232 0     0   0 my ($self, $data) = @_;
233 0 0       0 return unless $data;
234 0 0       0 $self->throw("No alignment position passed") if !exists($data->{BLOCK_LINE});
235 0 0       0 $self->throw("Alignment position must be an index greater than 0") if $data->{BLOCK_LINE} < 1;
236 0         0 my $index = $data->{BLOCK_LINE} - 1;
237 0 0       0 if (my $nse = $self->{'_params'}->{'-seqs'}->[$index]->{NSE}) {
238 0 0       0 $self->throw("NSE in passed data doesn't match stored data in same position: $nse") if $nse ne $data->{NSE};
239             } else {
240 0         0 $self->{'_params'}->{'-seqs'}->[$index]->{NSE} = $data->{NSE};
241             }
242 0 0       0 if ($data->{NAME} eq 'SEQUENCE') {
243 0         0 $self->{'_params'}->{'-seqs'}->[$index]->{SEQ} .= $data->{DATA};
244             }
245             }
246              
247             ####################### RAW DATA HANDLERS #######################
248              
249             # store by data name (ACCESSION, ID, etc), which can be mapped to the
250             # appropriate alignment or sequence parameter
251             sub _generic_store {
252 85     85   62 my ($self, $data) = @_;
253 85 50       114 return unless $data;
254 85 100       105 if ($data->{ALIGNMENT}) {
255 32         120 $self->{'_params'}->{'-'.lc $data->{NAME}} = $data->{DATA};
256             } else {
257             $self->{'_params'}->{'-seq_'.lc $data->{NAME}}->{$data->{NSE}} = $data->{DATA}
258 53         172 }
259             }
260              
261             sub _generic_reference {
262 22     22   28 my ($self, $data) = @_;
263             my $ref = Bio::Annotation::Reference->new(-title => $data->{TITLE},
264             -authors => $data->{AUTHORS},
265             -pubmed => $data->{PUBMED},
266             -location => $data->{JOURNAL},
267 22         124 -tagname => lc $data->{NAME});
268 22         44 $self->annotation_collection->add_Annotation($ref);
269             }
270              
271             sub _generic_simplevalue {
272 85     85   66 my ($self, $data) = @_;
273             my $sv = Bio::Annotation::SimpleValue->new(-value => $data->{DATA},
274 85         327 -tagname => lc $data->{NAME});
275 85         142 $self->annotation_collection->add_Annotation($sv);
276             }
277              
278             sub _generic_comment {
279 6     6   8 my ($self, $data) = @_;
280             my $comment = Bio::Annotation::Comment->new(-type => lc $data->{NAME},
281             -text => $data->{DATA},
282 6         52 -tagname => lc $data->{NAME});
283 6         14 $self->annotation_collection->add_Annotation($comment);
284             }
285              
286             # Some DBLinks in Stockholm format are unique, so a unique handler for them
287             sub _stockholm_target {
288 27     27   24 my ($self, $data) = @_;
289             # process database info
290 27         38 $self->_from_stk_dblink($data);
291 27         21 my $comment;
292             # Bio::Annotation::Target is now a DBLink, but has additional (RangeI)
293             # capabilities (for PDB data)
294             my $dblink = Bio::Annotation::Target->new(
295             -database => $data->{DBLINK_DB},
296             -primary_id => $data->{DBLINK_ACC},
297             -optional_id => $data->{DBLINK_OPT},
298             -start => $data->{DBLINK_START},
299             -end => $data->{DBLINK_END},
300             -strand => $data->{DBLINK_STRAND},
301 27         179 -comment => $comment,
302             -tagname => 'dblink',
303             );
304 27 100       80 if ($data->{ALIGNMENT}) {
305             # Alignment-specific DBLinks
306 21         38 $self->annotation_collection->add_Annotation($dblink);
307             } else {
308             # Sequence-specific DBLinks
309             # These should come with identifying information of some sort
310             # (ID/START/END/STRAND). Make into a SeqFeature (SimpleAlign is
311             # FeatureHolderI) spanning the length acc. to the NSE. Add the DBLink as
312             # Annotation specific to that SeqFeature, store in an internal hash by
313             # NSE so we can tie the LocatableSeq to the proper Features
314 6 50       18 $self->_from_nse($data) if $data->{NSE};
315             $self->throw("Must supply an sequence DISPLAY_ID or NSE for sequence-related
316 6 0 33     10 DBLinks") unless $data->{ACCESSION_NUMBER} || $data->{DISPLAY_ID};
317             my $sf = Bio::SeqFeature::Generic->new(-seq_id => $data->{DISPLAY_ID},
318             -accession_number => $data->{ACCESSION_NUMBER},
319             -start => $data->{START},
320             -end => $data->{END},
321             -strand => $data->{STRAND}
322 6         36 );
323 6         14 $sf->annotation->add_Annotation($dblink);
324             # index by NSE
325 6         7 push @{ $self->{'_features'}->{ $data->{NSE} } }, $sf;
  6         18  
326             #$self->seq_annotation_collection->add_Annotation($dblink);
327             }
328             }
329              
330             ####################### HELPER METHODS #######################
331              
332             # returns ACCESSION VERSION START END STRAND ALPHABET
333             # cached for multiple lookups, should reset in between uses
334             sub _from_nse {
335 174     174   163 my ($self, $data) = @_;
336 174 50       326 return unless my $nse = $data->{NSE};
337 174   100     280 $data->{ALPHABET} = $self->get_params('-alphabet')->{'-alphabet'} || 'protein';
338             # grab any accessions if present, switch out with ACCESSION from NSE
339             # (move that to primary_id)
340 174         201 my $new_acc;
341 174 100       280 if (exists $self->{'_params'}->{'-seq_accession'}) {
342 59         78 $new_acc = $self->{'_params'}->{'-seq_accession'}->{$data->{NSE}};
343             }
344 174 100       1083 if ($nse =~ m{(\S+?)(?:\.(\d+))?/(\d+)-(\d+)}xmso) {
345 172 100 66     671 my $strand = $data->{ALPHABET} eq 'dna' || $data->{ALPHABET} eq 'rna' ? 1 : undef;
346 172         416 my ($start, $end) = ($3, $4);
347 172 100       351 if ($start > $end) {
348 44         102 ($start, $end, $strand) = ($end, $start, -1);
349             }
350 172   66     451 $data->{ACCESSION_NUMBER} = $new_acc || $1;
351 172         224 $data->{DISPLAY_ID} = $1;
352 172         215 $data->{VERSION} = $2;
353 172         178 $data->{START} = $start;
354 172         326 $data->{END} = $end;
355 172         279 $data->{STRAND} = $strand;
356             } else {
357             # we can parse for version here if needed
358 2         242 $data->{DISPLAY_ID} = $data->{NSE};
359             }
360             }
361              
362             # this will probably be split up into subhandlers based on Record/DB
363             sub _from_stk_dblink {
364 27     27   36 my ($self, $data) = @_;
365 27 50       57 return unless my $raw = $data->{DATA};
366 27         126 my @rawdata = split(m{\s*;\s*}, $raw);
367 27         25 my %dblink_data;
368 27 100       57 if ($rawdata[0] eq 'PDB') {
    100          
369             # fix for older Stockholm PDB range format
370 12 50 33     22 if (scalar(@rawdata) == 3 && $rawdata[2] =~ m{-}) {
371 0         0 @rawdata[2,3] = split('-',$rawdata[2],2);
372             }
373 12 50       22 $self->throw("Not standard PDB form: ".$data->{DATA}) if scalar(@rawdata) != 4;
374 12         25 my ($main, $chain) = split(m{\s+}, $rawdata[1]);
375 12   50     56 %dblink_data = (
376             DBLINK_DB => $rawdata[0],
377             DBLINK_ACC => $main,
378             DBLINK_OPT => $chain || '',
379             DBLINK_START => $rawdata[2],
380             DBLINK_END => $rawdata[3]
381             );
382             } elsif ($rawdata[0] eq 'SCOP') {
383 3 50       8 $self->throw("Not standard SCOP form: ".$data->{DATA}) if scalar(@rawdata) != 3;
384 3         12 %dblink_data = (
385             DBLINK_DB => $rawdata[0],
386             DBLINK_ACC => $rawdata[1],
387             DBLINK_OPT => $rawdata[2],
388             );
389             } else {
390 12 50       21 $self->warn("Some data missed: ".$data->{DATA}) if scalar(@rawdata) > 2;
391 12         36 %dblink_data = (
392             DBLINK_DB => $rawdata[0],
393             DBLINK_ACC => $rawdata[1],
394             );
395             }
396 27         74 while (my ($k, $v) = each %dblink_data) {
397 93 50       310 $data->{$k} = $v if $v;
398             }
399             }
400              
401             1;
402              
403             __END__