File Coverage

Bio/AlignIO/po.pm
Criterion Covered Total %
statement 132 154 85.7
branch 39 50 78.0
condition 6 17 35.2
subroutine 5 5 100.0
pod 2 2 100.0
total 184 228 80.7


line stmt bran cond sub pod time code
1             # $Id: po.pm
2             #
3             # BioPerl module for Bio::AlignIO::po
4              
5             # based on the Bio::AlignIO::fasta module
6             # by Peter Schattner (and others?)
7             #
8             # and the SimpleAlign.pm module of Ewan Birney
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::AlignIO::po - po MSA Sequence input/output stream
17              
18             =head1 SYNOPSIS
19              
20             Do not use this module directly. Use it via the L class.
21              
22             =head1 DESCRIPTION
23              
24             This object can transform L objects to and from
25             'po' format flat file databases. 'po' format is the native format of
26             the POA alignment program (Lee C, Grasso C, Sharlow MF, 'Multiple
27             sequence alignment using partial order graphs', Bioinformatics (2002),
28             18(3):452-64).
29              
30             =head1 FEEDBACK
31              
32             =head2 Support
33              
34             Please direct usage questions or support issues to the mailing list:
35              
36             I
37              
38             rather than to the module maintainer directly. Many experienced and
39             reponsive experts will be able look at the problem and quickly
40             address it. Please include a thorough description of the problem
41             with code and data examples if at all possible.
42              
43             =head2 Reporting Bugs
44              
45             Report bugs to the Bioperl bug tracking system to help us keep track
46             the bugs and their resolution. Bug reports can be submitted via the
47             web:
48              
49             https://github.com/bioperl/bioperl-live/issues
50              
51             =head1 AUTHORS - Matthew Betts
52              
53             Email: matthew.betts@ii.uib.no
54              
55             =head1 APPENDIX
56              
57             The rest of the documentation details each of the object
58             methods. Internal methods are usually preceded with a _
59              
60             =cut
61              
62             # Let the code begin...
63              
64             package Bio::AlignIO::po;
65 2     2   441 use strict;
  2         3  
  2         46  
66              
67 2     2   467 use Bio::SimpleAlign;
  2         4  
  2         56  
68              
69 2     2   8 use base qw(Bio::AlignIO);
  2         3  
  2         1421  
70              
71              
72             =head2 next_aln
73              
74             Title : next_aln
75             Usage : $aln = $stream->next_aln()
76             Function: returns the next alignment in the stream.
77             Returns : L object - returns undef on end of file
78             or on error
79             Args : NONE
80              
81             =cut
82              
83             sub next_aln {
84 3     3 1 432 my $self = shift;
85              
86 3         6 my $aln;
87             my $entry;
88 0         0 my $name;
89 0         0 my $seqs;
90 0         0 my $seq;
91 0         0 my $nodes;
92 0         0 my $list;
93 0         0 my $node;
94 0         0 my @chars;
95 0         0 my $s;
96 0         0 my $a;
97              
98 3         19 $aln = Bio::SimpleAlign->new();
99              
100             # get to the first 'VERSION' line
101 3         15 while(defined($entry = $self->_readline)) {
102 3 50       17 if($entry =~ /^VERSION=(\S+)/) {
103 3         11 $aln->source($1);
104              
105 3 50 33     6 if(defined($entry = $self->_readline) and $entry =~ /^NAME=(\S+)/) {
106 3         12 $aln->id($1);
107             }
108              
109 3         6 last;
110             }
111             }
112              
113             # read in the sequence names and node data, up to the end of
114             # the file or the next 'VERSION' line, whichever comes first
115 3         6 $seqs = [];
116 3         4 $nodes = [];
117 3         7 while(defined($entry = $self->_readline)) {
118 2682 50       9456 if($entry =~ /^VERSION/) {
    100          
    100          
    100          
119             # start of a new alignment, so...
120 0         0 $self->_pushback($entry);
121 0         0 last;
122             }
123             elsif($entry =~ /^SOURCENAME=(\S+)/) {
124 18         124 $name = $1;
125              
126 18 50       25 if($name =~ /(\S+)\/(\d+)-(\d+)/) {
127 0         0 $seq = Bio::LocatableSeq->new(
128             '-display_id' => $1,
129             '-start' => $2,
130             '-end' => $3,
131             '-alphabet' => $self->alphabet,
132             );
133              
134             } else {
135 18         44 $seq = Bio::LocatableSeq->new('-display_id'=> $name,
136             '-alphabet' => $self->alphabet);
137             }
138              
139             # store sequences in a list initially, because can't guarantee
140             # that will get them back from SimpleAlign object in the order
141             # they were read, and also can't add them to the SimpleAlign
142             # object here because their sequences are currently unknown
143 18         17 push @{$seqs}, {
  18         72  
144             'seq' => $seq,
145             'str' => '',
146             };
147             }
148             elsif($entry =~ /^SOURCEINFO=(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(.*)/) {
149 18         38 $seq->desc($5);
150             }
151             elsif($entry =~ /^(\S):(\S+)/) {
152 2637         7052 $node = {
153             'aa' => $1,
154             'L' => [],
155             'S' => [],
156             'A' => [],
157             'status' => 'unvisited',
158             };
159              
160 2637         2572 $list = $2;
161 2637 50       5065 if($list =~ /^([L\d]*)([S\d]*)([A\d]*)/) {
162 2637         1631 push(@{$node->{'L'}}, split(/L/, $1));
  2637         5366  
163 2637         1916 push(@{$node->{'S'}}, split(/S/, $2));
  2637         4095  
164 2637         1888 push(@{$node->{'A'}}, split(/A/, $3));
  2637         3403  
165              
166 2637 100       1768 (@{$node->{'L'}} > 0) and shift @{$node->{'L'}};
  2625         2169  
  2637         3790  
167 2637 50       1853 (@{$node->{'S'}} > 0) and shift @{$node->{'S'}};
  2637         2258  
  2637         3304  
168 2637 100       1781 (@{$node->{'A'}} > 0) and shift @{$node->{'A'}};
  2313         1837  
  2637         3214  
169             }
170              
171 2637         1798 push @{$nodes}, $node;
  2637         4339  
172             }
173             }
174              
175             # process the nodes
176 3         7 foreach $node (@{$nodes}) {
  3         10  
177 2637 100       3261 ($node->{'status'} ne 'unvisited') and next;
178              
179 1266         1458 @chars = ($aln->gap_char) x @{$seqs}; # char for each seq defaults to a gap
  1266         1904  
180              
181             # set the character for each sequence represented by this node
182 1266         826 foreach $s (@{$node->{'S'}}) {
  1266         1295  
183 2706         2800 $chars[$s] = $node->{'aa'};
184             }
185 1266         903 $node->{'status'} = 'visited';
186              
187             # do the same for each node in the same align ring
188 1266         1734 while(defined($a = $node->{'A'}->[0])) {
189 2313         1536 $node = $nodes->[$a];
190 2313 100       3561 ($node->{'status'} ne 'unvisited') and last;
191              
192 1371         772 foreach $s (@{$node->{'S'}}) {
  1371         1323  
193 1590         1564 $chars[$s] = $node->{'aa'};
194             }
195              
196 1371         1970 $node->{'status'} = 'visited';
197             }
198              
199             # update the sequences
200 1266         719 foreach $seq (@{$seqs}) {
  1266         986  
201 7596         6118 $seq->{'str'} .= shift @chars;
202             }
203             }
204              
205             # set the sequences of the bioperl objects
206             # and add them to the alignment
207 3         3 foreach $seq (@{$seqs}) {
  3         7  
208 18         50 $seq->{'seq'}->seq($seq->{'str'});
209 18         41 $aln->add_seq($seq->{'seq'});
210             }
211              
212             # has an alignment been read?...
213              
214 3 50       9 return $aln if $aln->num_sequences;
215 0         0 return;
216             }
217              
218             =head2 write_aln
219              
220             Title : write_aln
221             Usage : $stream->write_aln(@aln)
222             Function: writes the $aln object into the stream in po format
223             Returns : 1 for success and 0 for error
224             Args : L object
225              
226             =cut
227              
228             sub write_aln {
229 2     2 1 8 my $self = shift;
230 2         6 my @alns = @_;
231              
232 2         3 my $aln;
233             my $seqs;
234 0         0 my $nodes;
235 0         0 my $seq;
236 0         0 my $node;
237 0         0 my $col;
238 0         0 my $ring;
239 0         0 my $i;
240 0         0 my $char;
241              
242 2         6 foreach $aln (@alns) {
243 2 50 33     13 if(!$aln or !$aln->isa('Bio::Align::AlignI')) {
244 0         0 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
245 0         0 next;
246             }
247              
248             # store the seqs on a list, because po format
249             # refers to them by position on this list
250 2         4 $seqs = [];
251 2         6 foreach $seq ($aln->each_seq()) {
252 12         6 push @{$seqs}, {
  12         28  
253             'seq' => $seq,
254             'n_nodes' => 0,
255             'first' => undef,
256             'previous' => undef,
257             };
258             }
259              
260             # go through each column in the alignment and construct
261             # the nodes for the equivalent poa alignment ring
262 2         4 $nodes = [];
263 2         10 for($col = 0; $col < $aln->length; $col++) {
264             $ring = {
265             'nodes' => {},
266 844         707 'first' => scalar @{$nodes},
267 844         792 'last' => scalar @{$nodes},
  844         1461  
268             };
269              
270 844         1199 for($i = 0; $i < @{$seqs}; $i++) {
  5908         7562  
271 5064         3486 $seq = $seqs->[$i];
272              
273 5064         7534 $char = $seq->{'seq'}->subseq($col + 1, $col + 1);
274              
275 5064 100       6327 ($char eq $aln->gap_char) and next;
276              
277 2864 100       4393 if(!defined($node = $ring->{'nodes'}->{$char})) {
278             $node = {
279 1758         1140 'n' => scalar @{$nodes},
  1758         4512  
280             'aa' => $char,
281             'L' => {},
282             'S' => [],
283             'A' => [],
284             };
285              
286             # update the ring
287 1758         1826 $ring->{'nodes'}->{$char} = $node;
288 1758         1372 $ring->{'last'} = $node->{'n'};
289              
290             # add the node to the node list
291 1758         1092 push @{$nodes}, $node;
  1758         1681  
292             }
293              
294             # add the sequence to the node
295 2864         1733 push @{$node->{'S'}}, $i;
  2864         2825  
296              
297             # add the node to the sequence
298 2864 100       3841 defined($seq->{'first'}) or ($seq->{'first'} = $node);
299 2864         2017 $seq->{'n_nodes'}++;
300              
301             # add an edge from the previous node in the sequence to this one.
302             # Then set the previous node to the current one, ready for the next
303             # residue in this sequence
304 2864 100       6071 defined($seq->{'previous'}) and ($node->{'L'}->{$seq->{'previous'}->{'n'}} = $seq->{'previous'});
305 2864         2549 $seq->{'previous'} = $node;
306             }
307              
308             # set the 'next node in ring' field for each node in the ring
309 844 100       1360 if($ring->{'first'} < $ring->{'last'}) {
310 628         919 for($i = $ring->{'first'}; $i < $ring->{'last'}; $i++) {
311 914         557 push @{$nodes->[$i]->{'A'}}, $i + 1;
  914         1752  
312             }
313 628         432 push @{$nodes->[$ring->{'last'}]->{'A'}}, $ring->{'first'};
  628         1333  
314             }
315             }
316              
317             # print header information
318             $self->_print(
319             'VERSION=', ($aln->source and ($aln->source !~ /\A\s*\Z/)) ? $aln->source : 'bioperl', "\n",
320             'NAME=', $aln->id, "\n",
321             'TITLE=', ($seqs->[0]->{'seq'}->description or $aln->id), "\n",
322 2         4 'LENGTH=', scalar @{$nodes}, "\n",
323 2 50 33     11 'SOURCECOUNT=', scalar @{$seqs}, "\n",
  2   33     47  
324             );
325              
326             # print sequence information
327 2         4 foreach $seq (@{$seqs}) {
  2         6  
328             $self->_print(
329             'SOURCENAME=', $seq->{'seq'}->display_id, "\n",
330             'SOURCEINFO=',
331             $seq->{'n_nodes'}, ' ', # number of nodes in the sequence
332             $seq->{'first'}->{'n'}, ' ', # index of first node containing the sequence
333             0, ' ', # FIXME - sequence weight?
334             -1, ' ', # FIXME - index of bundle containing sequence?
335 12   50     26 ($seq->{'seq'}->description or 'untitled'), "\n",
336             );
337             }
338              
339             # print node information
340 2         6 foreach $node (@{$nodes}) {
  2         5  
341 1758         2077 $self->_print($node->{'aa'}, ':');
342 1758 100       989 (keys %{$node->{'L'}} > 0) and $self->_print('L', join('L', sort {$a <=> $b} keys %{$node->{'L'}}));
  164         378  
  1750         3251  
  1758         3422  
343 1758 50       1317 (@{$node->{'S'}} > 0) and $self->_print('S', join('S', @{$node->{'S'}}));
  1758         3318  
  1758         2582  
344 1758 100       1234 (@{$node->{'A'}} > 0) and $self->_print('A', join('A', @{$node->{'A'}}));
  1542         2599  
  1758         2477  
345 1758         2015 $self->_print("\n");
346             }
347             }
348 2 50 33     10 $self->flush if $self->_flush_on_write && defined $self->_fh;
349              
350 2         2025 return 1;
351             }
352              
353             1;