File Coverage

Bio/AlignIO/po.pm
Criterion Covered Total %
statement 132 154 85.7
branch 39 50 78.0
condition 6 17 35.2
subroutine 5 5 100.0
pod 2 2 100.0
total 184 228 80.7


line stmt bran cond sub pod time code
1             # $Id: po.pm
2             #
3             # BioPerl module for Bio::AlignIO::po
4              
5             # based on the Bio::AlignIO::fasta module
6             # by Peter Schattner (and others?)
7             #
8             # and the SimpleAlign.pm module of Ewan Birney
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::AlignIO::po - po MSA Sequence input/output stream
17              
18             =head1 SYNOPSIS
19              
20             Do not use this module directly. Use it via the L class.
21              
22             =head1 DESCRIPTION
23              
24             This object can transform L objects to and from
25             'po' format flat file databases. 'po' format is the native format of
26             the POA alignment program (Lee C, Grasso C, Sharlow MF, 'Multiple
27             sequence alignment using partial order graphs', Bioinformatics (2002),
28             18(3):452-64).
29              
30             =head1 FEEDBACK
31              
32             =head2 Support
33              
34             Please direct usage questions or support issues to the mailing list:
35              
36             I
37              
38             rather than to the module maintainer directly. Many experienced and
39             reponsive experts will be able look at the problem and quickly
40             address it. Please include a thorough description of the problem
41             with code and data examples if at all possible.
42              
43             =head2 Reporting Bugs
44              
45             Report bugs to the Bioperl bug tracking system to help us keep track
46             the bugs and their resolution. Bug reports can be submitted via the
47             web:
48              
49             https://github.com/bioperl/bioperl-live/issues
50              
51             =head1 AUTHORS - Matthew Betts
52              
53             Email: matthew.betts@ii.uib.no
54              
55             =head1 APPENDIX
56              
57             The rest of the documentation details each of the object
58             methods. Internal methods are usually preceded with a _
59              
60             =cut
61              
62             # Let the code begin...
63              
64             package Bio::AlignIO::po;
65 2     2   372 use strict;
  2         2  
  2         52  
66              
67 2     2   472 use Bio::SimpleAlign;
  2         3  
  2         60  
68              
69 2     2   11 use base qw(Bio::AlignIO);
  2         2  
  2         1591  
70              
71              
72             =head2 next_aln
73              
74             Title : next_aln
75             Usage : $aln = $stream->next_aln()
76             Function: returns the next alignment in the stream.
77             Returns : L object - returns undef on end of file
78             or on error
79             Args : NONE
80              
81             =cut
82              
83             sub next_aln {
84 3     3 1 733 my $self = shift;
85              
86 3         5 my $aln;
87             my $entry;
88 0         0 my $name;
89 0         0 my $seqs;
90 0         0 my $seq;
91 0         0 my $nodes;
92 0         0 my $list;
93 0         0 my $node;
94 0         0 my @chars;
95 0         0 my $s;
96 0         0 my $a;
97              
98 3         25 $aln = Bio::SimpleAlign->new();
99              
100             # get to the first 'VERSION' line
101 3         19 while(defined($entry = $self->_readline)) {
102 3 50       20 if($entry =~ /^VERSION=(\S+)/) {
103 3         15 $aln->source($1);
104              
105 3 50 33     7 if(defined($entry = $self->_readline) and $entry =~ /^NAME=(\S+)/) {
106 3         16 $aln->id($1);
107             }
108              
109 3         7 last;
110             }
111             }
112              
113             # read in the sequence names and node data, up to the end of
114             # the file or the next 'VERSION' line, whichever comes first
115 3         6 $seqs = [];
116 3         6 $nodes = [];
117 3         10 while(defined($entry = $self->_readline)) {
118 2682 50       11110 if($entry =~ /^VERSION/) {
    100          
    100          
    100          
119             # start of a new alignment, so...
120 0         0 $self->_pushback($entry);
121 0         0 last;
122             }
123             elsif($entry =~ /^SOURCENAME=(\S+)/) {
124 18         130 $name = $1;
125              
126 18 50       26 if($name =~ /(\S+)\/(\d+)-(\d+)/) {
127 0         0 $seq = Bio::LocatableSeq->new(
128             '-display_id' => $1,
129             '-start' => $2,
130             '-end' => $3,
131             '-alphabet' => $self->alphabet,
132             );
133              
134             } else {
135 18         33 $seq = Bio::LocatableSeq->new('-display_id'=> $name,
136             '-alphabet' => $self->alphabet);
137             }
138              
139             # store sequences in a list initially, because can't guarantee
140             # that will get them back from SimpleAlign object in the order
141             # they were read, and also can't add them to the SimpleAlign
142             # object here because their sequences are currently unknown
143 18         16 push @{$seqs}, {
  18         65  
144             'seq' => $seq,
145             'str' => '',
146             };
147             }
148             elsif($entry =~ /^SOURCEINFO=(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(.*)/) {
149 18         36 $seq->desc($5);
150             }
151             elsif($entry =~ /^(\S):(\S+)/) {
152 2637         8555 $node = {
153             'aa' => $1,
154             'L' => [],
155             'S' => [],
156             'A' => [],
157             'status' => 'unvisited',
158             };
159              
160 2637         2993 $list = $2;
161 2637 50       5673 if($list =~ /^([L\d]*)([S\d]*)([A\d]*)/) {
162 2637         1702 push(@{$node->{'L'}}, split(/L/, $1));
  2637         6104  
163 2637         2003 push(@{$node->{'S'}}, split(/S/, $2));
  2637         5131  
164 2637         1895 push(@{$node->{'A'}}, split(/A/, $3));
  2637         4239  
165              
166 2637 100       1946 (@{$node->{'L'}} > 0) and shift @{$node->{'L'}};
  2625         2417  
  2637         4806  
167 2637 50       2026 (@{$node->{'S'}} > 0) and shift @{$node->{'S'}};
  2637         2142  
  2637         3593  
168 2637 100       1835 (@{$node->{'A'}} > 0) and shift @{$node->{'A'}};
  2313         1994  
  2637         3563  
169             }
170              
171 2637         1938 push @{$nodes}, $node;
  2637         5198  
172             }
173             }
174              
175             # process the nodes
176 3         9 foreach $node (@{$nodes}) {
  3         11  
177 2637 100       3392 ($node->{'status'} ne 'unvisited') and next;
178              
179 1266         1496 @chars = ($aln->gap_char) x @{$seqs}; # char for each seq defaults to a gap
  1266         1751  
180              
181             # set the character for each sequence represented by this node
182 1266         861 foreach $s (@{$node->{'S'}}) {
  1266         1329  
183 2706         2964 $chars[$s] = $node->{'aa'};
184             }
185 1266         865 $node->{'status'} = 'visited';
186              
187             # do the same for each node in the same align ring
188 1266         1725 while(defined($a = $node->{'A'}->[0])) {
189 2313         1597 $node = $nodes->[$a];
190 2313 100       3496 ($node->{'status'} ne 'unvisited') and last;
191              
192 1371         775 foreach $s (@{$node->{'S'}}) {
  1371         1348  
193 1590         1694 $chars[$s] = $node->{'aa'};
194             }
195              
196 1371         1939 $node->{'status'} = 'visited';
197             }
198              
199             # update the sequences
200 1266         710 foreach $seq (@{$seqs}) {
  1266         964  
201 7596         6387 $seq->{'str'} .= shift @chars;
202             }
203             }
204              
205             # set the sequences of the bioperl objects
206             # and add them to the alignment
207 3         7 foreach $seq (@{$seqs}) {
  3         11  
208 18         75 $seq->{'seq'}->seq($seq->{'str'});
209 18         49 $aln->add_seq($seq->{'seq'});
210             }
211              
212             # has an alignment been read?...
213              
214 3 50       14 return $aln if $aln->num_sequences;
215 0         0 return;
216             }
217              
218             =head2 write_aln
219              
220             Title : write_aln
221             Usage : $stream->write_aln(@aln)
222             Function: writes the $aln object into the stream in po format
223             Returns : 1 for success and 0 for error
224             Args : L object
225              
226             =cut
227              
228             sub write_aln {
229 2     2 1 9 my $self = shift;
230 2         5 my @alns = @_;
231              
232 2         3 my $aln;
233             my $seqs;
234 0         0 my $nodes;
235 0         0 my $seq;
236 0         0 my $node;
237 0         0 my $col;
238 0         0 my $ring;
239 0         0 my $i;
240 0         0 my $char;
241              
242 2         5 foreach $aln (@alns) {
243 2 50 33     17 if(!$aln or !$aln->isa('Bio::Align::AlignI')) {
244 0         0 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
245 0         0 next;
246             }
247              
248             # store the seqs on a list, because po format
249             # refers to them by position on this list
250 2         4 $seqs = [];
251 2         7 foreach $seq ($aln->each_seq()) {
252 12         8 push @{$seqs}, {
  12         31  
253             'seq' => $seq,
254             'n_nodes' => 0,
255             'first' => undef,
256             'previous' => undef,
257             };
258             }
259              
260             # go through each column in the alignment and construct
261             # the nodes for the equivalent poa alignment ring
262 2         6 $nodes = [];
263 2         12 for($col = 0; $col < $aln->length; $col++) {
264             $ring = {
265             'nodes' => {},
266 844         726 'first' => scalar @{$nodes},
267 844         773 'last' => scalar @{$nodes},
  844         1619  
268             };
269              
270 844         1326 for($i = 0; $i < @{$seqs}; $i++) {
  5908         7705  
271 5064         4007 $seq = $seqs->[$i];
272              
273 5064         8047 $char = $seq->{'seq'}->subseq($col + 1, $col + 1);
274              
275 5064 100       6674 ($char eq $aln->gap_char) and next;
276              
277 2864 100       4340 if(!defined($node = $ring->{'nodes'}->{$char})) {
278             $node = {
279 1758         1172 'n' => scalar @{$nodes},
  1758         4936  
280             'aa' => $char,
281             'L' => {},
282             'S' => [],
283             'A' => [],
284             };
285              
286             # update the ring
287 1758         2015 $ring->{'nodes'}->{$char} = $node;
288 1758         1392 $ring->{'last'} = $node->{'n'};
289              
290             # add the node to the node list
291 1758         1146 push @{$nodes}, $node;
  1758         1846  
292             }
293              
294             # add the sequence to the node
295 2864         1877 push @{$node->{'S'}}, $i;
  2864         3281  
296              
297             # add the node to the sequence
298 2864 100       3900 defined($seq->{'first'}) or ($seq->{'first'} = $node);
299 2864         1950 $seq->{'n_nodes'}++;
300              
301             # add an edge from the previous node in the sequence to this one.
302             # Then set the previous node to the current one, ready for the next
303             # residue in this sequence
304 2864 100       6288 defined($seq->{'previous'}) and ($node->{'L'}->{$seq->{'previous'}->{'n'}} = $seq->{'previous'});
305 2864         2757 $seq->{'previous'} = $node;
306             }
307              
308             # set the 'next node in ring' field for each node in the ring
309 844 100       1476 if($ring->{'first'} < $ring->{'last'}) {
310 628         956 for($i = $ring->{'first'}; $i < $ring->{'last'}; $i++) {
311 914         591 push @{$nodes->[$i]->{'A'}}, $i + 1;
  914         1813  
312             }
313 628         423 push @{$nodes->[$ring->{'last'}]->{'A'}}, $ring->{'first'};
  628         1499  
314             }
315             }
316              
317             # print header information
318             $self->_print(
319             'VERSION=', ($aln->source and ($aln->source !~ /\A\s*\Z/)) ? $aln->source : 'bioperl', "\n",
320             'NAME=', $aln->id, "\n",
321             'TITLE=', ($seqs->[0]->{'seq'}->description or $aln->id), "\n",
322 2         8 'LENGTH=', scalar @{$nodes}, "\n",
323 2 50 33     12 'SOURCECOUNT=', scalar @{$seqs}, "\n",
  2   33     66  
324             );
325              
326             # print sequence information
327 2         4 foreach $seq (@{$seqs}) {
  2         6  
328             $self->_print(
329             'SOURCENAME=', $seq->{'seq'}->display_id, "\n",
330             'SOURCEINFO=',
331             $seq->{'n_nodes'}, ' ', # number of nodes in the sequence
332             $seq->{'first'}->{'n'}, ' ', # index of first node containing the sequence
333             0, ' ', # FIXME - sequence weight?
334             -1, ' ', # FIXME - index of bundle containing sequence?
335 12   50     26 ($seq->{'seq'}->description or 'untitled'), "\n",
336             );
337             }
338              
339             # print node information
340 2         4 foreach $node (@{$nodes}) {
  2         4  
341 1758         2543 $self->_print($node->{'aa'}, ':');
342 1758 100       1149 (keys %{$node->{'L'}} > 0) and $self->_print('L', join('L', sort {$a <=> $b} keys %{$node->{'L'}}));
  163         477  
  1750         4167  
  1758         4245  
343 1758 50       1594 (@{$node->{'S'}} > 0) and $self->_print('S', join('S', @{$node->{'S'}}));
  1758         3905  
  1758         3039  
344 1758 100       1461 (@{$node->{'A'}} > 0) and $self->_print('A', join('A', @{$node->{'A'}}));
  1542         3108  
  1758         2898  
345 1758         2331 $self->_print("\n");
346             }
347             }
348 2 50 33     18 $self->flush if $self->_flush_on_write && defined $self->_fh;
349              
350 2         2810 return 1;
351             }
352              
353             1;