File Coverage

Bio/AlignIO/largemultifasta.pm
Criterion Covered Total %
statement 64 71 90.1
branch 16 26 61.5
condition 7 14 50.0
subroutine 8 8 100.0
pod 3 3 100.0
total 98 122 80.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::largemultifasta
3              
4             # based on the Bio::SeqIO::largefasta module
5             # by Ewan Birney
6             # and Lincoln Stein
7             #
8             # and the SimpleAlign.pm module of Ewan Birney
9             #
10             # Copyright Albert Vilella
11             #
12             # You may distribute this module under the same terms as perl itself
13             # _history
14             # January 20, 2004
15             # POD documentation - main docs before the code
16              
17             =head1 NAME
18              
19             Bio::AlignIO::largemultifasta - Largemultifasta MSA Sequence
20             input/output stream
21              
22             =head1 SYNOPSIS
23              
24             Do not use this module directly. Use it via the L class.
25              
26             =head1 DESCRIPTION
27              
28             This object can transform L objects to and from
29             largemultifasta flat file databases. This is for the fasta sequence
30             format NOT FastA analysis program. To process the pairwise alignments
31             from a FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO
32             module.
33              
34             Reimplementation of Bio::AlignIO::fasta modules so that creates
35             temporary files instead of keeping the whole sequences in memory.
36              
37             =head1 FEEDBACK
38              
39             =head2 Support
40              
41             Please direct usage questions or support issues to the mailing list:
42              
43             I
44              
45             rather than to the module maintainer directly. Many experienced and
46             reponsive experts will be able look at the problem and quickly
47             address it. Please include a thorough description of the problem
48             with code and data examples if at all possible.
49              
50             =head2 Reporting Bugs
51              
52             Report bugs to the Bioperl bug tracking system to help us keep track
53             the bugs and their resolution. Bug reports can be submitted via the
54             web:
55              
56             https://github.com/bioperl/bioperl-live/issues
57              
58             =head1 AUTHORS - Albert Vilella, Heikki Lehvaslaiho
59              
60             Email: avilella-at-gmail-dot-com, heikki-at-bioperl-dot-org
61              
62              
63             =head1 APPENDIX
64              
65             The rest of the documentation details each of the object
66             methods. Internal methods are usually preceded with a _
67              
68             =cut
69              
70             # Let the code begin...
71              
72             package Bio::AlignIO::largemultifasta;
73 1     1   438 use strict;
  1         1  
  1         24  
74              
75 1     1   262 use Bio::Seq::LargeLocatableSeq;
  1         3  
  1         27  
76 1     1   255 use Bio::Seq::SeqFactory;
  1         1  
  1         21  
77              
78 1     1   4 use base qw(Bio::AlignIO Bio::SeqIO Bio::SimpleAlign);
  1         1  
  1         292  
79              
80              
81             sub _initialize {
82 2     2   3 my($self,@args) = @_;
83 2         7 $self->SUPER::_initialize(@args);
84 2 50       8 if( ! defined $self->sequence_factory ) {
85 2         5 $self->sequence_factory(Bio::Seq::SeqFactory->new(
86             -verbose => $self->verbose(),
87             -type => 'Bio::Seq::LargeLocatableSeq'
88             ));
89             }
90             }
91              
92             =head2 next_seq
93              
94             Title : next_seq
95             Usage : $seq = $stream->next_seq()
96             Function: returns the next sequence in the stream while taking care
97             of the length
98             Returns : Bio::Seq object
99             Args : NONE
100              
101             =cut
102              
103             sub next_seq {
104 7     7 1 6 my ($self) = @_;
105 7         12 my $largeseq = $self->sequence_factory->create(-alphabet=>$self->alphabet);
106 7         8 my ($id,$fulldesc,$entry);
107 7         5 my $count = 0;
108 7         6 my $seen = 0;
109 7         19 while( defined ($entry = $self->_readline) ) {
110 17 100 100     56 if( $seen == 1 && $entry =~ /^\s*>/ ) {
111 5         12 $self->_pushback($entry);
112 5         11 return $largeseq;
113             }
114 12 50       56 if ( $entry eq '>' ) {
    100          
115 0         0 $seen = 1; next;
  0         0  
116             } elsif( $entry =~ /\s*>(.+?)$/ ) {
117 6         5 $seen = 1;
118 6 50       33 ($id,$fulldesc) = ($1 =~ /^\s*(\S+)\s*(.*)$/)
119             or $self->warn("Can't parse fasta header");
120 6         17 $largeseq->display_id($id);
121 6         10 $largeseq->primary_id($id);
122 6         10 $largeseq->desc($fulldesc);
123             } else {
124 6         18 $entry =~ s/\s+//g;
125 6         13 $largeseq->add_sequence_as_string($entry);
126             }
127 12 50 33     37 (++$count % 1000 == 0 && $self->verbose() > 0) && print "line $count\n";
128             }
129 2 100       6 if( ! $seen ) { return; }
  1         3  
130 1         2 return $largeseq;
131             }
132              
133              
134             =head2 next_aln
135              
136             Title : next_aln
137             Usage : $aln = $stream->next_aln()
138             Function: returns the next alignment in the stream.
139             Returns : L object - returns 0 on end of file
140             or on error
141             Args : NONE
142              
143             =cut
144              
145             sub next_aln {
146 1     1 1 5 my $self = shift;
147 1         1 my $largeseq;
148 1         5 my $aln = Bio::SimpleAlign->new();
149 1         2 while (defined ($largeseq = $self->next_seq) ) {
150 6         15 $aln->add_seq($largeseq);
151 6         10 $self->debug("sequence readed\n");
152             }
153              
154 1         5 my $alnlen = $aln->length;
155 1         3 foreach my $largeseq ( $aln->each_seq ) {
156 6 50       8 if( $largeseq->length < $alnlen ) {
157 0         0 my ($diff) = ($alnlen - $largeseq->length);
158 0         0 $largeseq->seq("-" x $diff);
159             }
160             }
161              
162 1 50       4 return $aln if $aln->num_sequences;
163 0         0 return;
164              
165             }
166              
167             =head2 write_aln
168              
169             Title : write_aln
170             Usage : $stream->write_aln(@aln)
171             Function: writes the $aln object into the stream in largemultifasta format
172             Returns : 1 for success and 0 for error
173             Args : L object
174              
175              
176             =cut
177              
178             sub write_aln {
179 1     1 1 5 my ($self,@aln) = @_;
180 1         2 my ($seq,$desc,$rseq,$name,$count,$length,$seqsub);
181              
182 1         2 foreach my $aln (@aln) {
183 1 50 33     7 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
184 0         0 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
185 0         0 next;
186             }
187 1         2 foreach $rseq ( $aln->each_seq() ) {
188 6         12 $name = $aln->displayname($rseq->get_nse());
189 6         10 $seq = $rseq->seq();
190 6   50     10 $desc = $rseq->description || '';
191 6 50       22 $self->_print (">$name $desc\n") or return ;
192 6         6 $count =0;
193 6         3 $length = length($seq);
194 6         13 while( ($count * 60 ) < $length ) {
195 12         16 $seqsub = substr($seq,$count*60,60);
196 12 50       17 $self->_print ("$seqsub\n") or return ;
197 12         18 $count++;
198             }
199             }
200             }
201 1 50 33     2 $self->flush if $self->_flush_on_write && defined $self->_fh;
202 1         3 return 1;
203             }
204              
205             1;