File Coverage

Bio/AlignIO/meme.pm
Criterion Covered Total %
statement 45 51 88.2
branch 21 28 75.0
condition 7 9 77.7
subroutine 5 6 83.3
pod 2 2 100.0
total 80 96 83.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::meme
3             # Based on the Bio::SeqIO modules
4             # by Ewan Birney
5             # and Lincoln Stein
6             # and the SimpleAlign.pm module of Ewan Birney
7             #
8             # Copyright Benjamin Berman
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             =head1 NAME
13              
14             Bio::AlignIO::meme - meme sequence input/output stream
15              
16             =head1 SYNOPSIS
17              
18             Do not use this module directly. Use it via the Bio::AlignIO class.
19              
20             use Bio::AlignIO;
21             # read in an alignment from meme
22             my $in = Bio::AlignIO->new(-format => 'meme',
23             -file => 'meme.out');
24             while( my $aln = $in->next_aln ) {
25             # do something with the alignment
26             }
27              
28             =head1 DESCRIPTION
29              
30             This object transforms the "sites sorted by position p-value" sections
31             of a meme (text) output file into a series of Bio::SimpleAlign
32             objects. Each SimpleAlign object contains Bio::LocatableSeq
33             objects which represent the individual aligned sites as defined by
34             the central portion of the "site" field in the meme file. The start
35             and end coordinates are derived from the "Start" field. See
36             L and L for more information.
37              
38             This module can only parse MEME version 3 and 4. Previous
39             versions have output formats that are more difficult to parse
40             correctly. If the meme output file is not version 3.0 or greater
41             we signal an error.
42              
43             =head1 FEEDBACK
44              
45             =head2 Support
46              
47             Please direct usage questions or support issues to the mailing list:
48              
49             I
50              
51             rather than to the module maintainer directly. Many experienced and
52             reponsive experts will be able look at the problem and quickly
53             address it. Please include a thorough description of the problem
54             with code and data examples if at all possible.
55              
56             =head2 Reporting Bugs
57              
58             Report bugs to the Bioperl bug tracking system to help us keep track
59             the bugs and their resolution. Bug reports can be submitted via the
60             web:
61              
62             https://github.com/bioperl/bioperl-live/issues
63              
64             =head1 AUTHORS - Benjamin Berman
65              
66             Bbased on the Bio::SeqIO modules by Ewan Birney and others
67             Email: benb@fruitfly.berkeley.edu
68              
69             =head1 APPENDIX
70              
71             The rest of the documentation details each of the object
72             methods. Internal methods are usually preceded with an
73             underscore.
74              
75             =cut
76              
77             # Let the code begin...
78              
79             package Bio::AlignIO::meme;
80 1     1   417 use strict;
  1         2  
  1         24  
81 1     1   263 use Bio::LocatableSeq;
  1         2  
  1         28  
82              
83 1     1   5 use base qw(Bio::AlignIO);
  1         40  
  1         283  
84              
85             # Constants
86             my $MEME_VERS_ERR =
87             "MEME output file must be generated by version 3.0 or higher";
88             my $MEME_NO_HEADER_ERR =
89             "MEME output file contains no header line (ex: MEME version 3.0)";
90             my $HTML_VERS_ERR = "MEME output file must be generated with the -text option";
91              
92             =head2 next_aln
93              
94             Title : next_aln
95             Usage : $aln = $stream->next_aln()
96             Function: returns the next alignment in the stream
97             Returns : Bio::SimpleAlign object with the score() set to the evalue of the
98             motif.
99             Args : NONE
100              
101             =cut
102              
103             sub next_aln {
104 3     3 1 1190 my ($self) = @_;
105 3         29 my $aln = Bio::SimpleAlign->new( -source => 'meme' );
106 3         5 my $line;
107 3         6 my $good_align_sec = 0;
108 3         4 my $in_align_sec = 0;
109 3         4 my $evalue;
110 3   66     21 while ( !$good_align_sec && defined( $line = $self->_readline() ) ) {
111 510 100 66     1064 if ( !$in_align_sec ) {
    100 100        
    100          
    50          
112              
113             # Check for the meme header
114 436 100       604 if ( $line =~ /^\s*MEME\s+version\s+(\S+)/ ) {
115 3         68 $self->{'meme_vers'} = $1;
116 3         10 my ($vers) = $self->{'meme_vers'} =~ /^(\d)/;
117 3 50       39 $self->throw($MEME_VERS_ERR) unless ( $vers >= 3 );
118 3         5 $self->{'seen_header'} = 1;
119             }
120              
121             # Check if they've output the HTML version
122 436 50       647 if ( $line =~ /\/i ) {
123 0         0 $self->throw($HTML_VERS_ERR);
124             }
125              
126             # Grab the evalue
127 436 100       557 if ( $line =~ /MOTIF\s+\d+\s+width.+E-value = (\S+)/ ) {
128             $self->throw($MEME_NO_HEADER_ERR)
129 3 50       7 unless ( $self->{'seen_header'} );
130 3         7 $evalue = $1;
131             }
132              
133             # Check if we're going into an alignment section
134 436 100       901 if ( $line =~ /sites sorted by position/ ) {
135             $self->throw($MEME_NO_HEADER_ERR)
136 3 50       5 unless ( $self->{'seen_header'} );
137 3         9 $in_align_sec = 1;
138             }
139             }
140             # The first regexp is for version 3, the second is for version 4
141             elsif ( $line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
142             \S+\s+[.A-Z\-]*\s+([A-Z\-]+)\s+
143             ([.A-Z\-]*)/xi
144             ||
145             $line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
146             \S+\s+\.\s+([A-Z\-]+)/xi
147             )
148             {
149             # Got a sequence line
150 59         120 my $seq_name = $1;
151 59 100       100 my $strand = ( $2 eq '-' ) ? -1 : 1;
152 59         64 my $start_pos = $3;
153 59         100 my $central = uc($4);
154              
155             # my $p_val = $4;
156             # my $left_flank = uc($5);
157             # my $right_flank = uc($7);
158              
159             # Info about the flanking sequence
160             # my $start_len = ($strand > 0) ? length($left_flank) :
161             # length($right_flank);
162             # my $end_len = ($strand > 0) ? length($right_flank) :
163             # length($left_flank);
164              
165             # Make the sequence. Meme gives the start coordinate at the left
166             # hand side of the motif relative to the INPUT sequence.
167 59         123 my $end_pos = $start_pos + length($central) - 1;
168 59         132 my $seq = Bio::LocatableSeq->new(
169             -seq => $central,
170             -display_id => $seq_name,
171             -start => $start_pos,
172             -end => $end_pos,
173             -strand => $strand,
174             -alphabet => $self->alphabet,
175             );
176              
177             # Add the sequence motif to the alignment
178 59         148 $aln->add_seq($seq);
179             }
180             elsif ( ( $line =~ /^\-/ ) || ( $line =~ /Sequence name/ ) ) {
181              
182             # These are acceptable things to be in the site section
183             }
184             elsif ( $line =~ /^\s*$/ ) {
185              
186             # This ends the site section
187 3         4 $in_align_sec = 0;
188 3         7 $good_align_sec = 1;
189             }
190             else {
191 0         0 $self->warn("Unrecognized format:\n$line");
192 0         0 return;
193             }
194             }
195              
196             # Signal an error if we didn't find a header section
197 3 50       8 $self->throw($MEME_NO_HEADER_ERR) unless ( $self->{'seen_header'} );
198              
199 3 50       7 if ($good_align_sec) {
200 3         10 $aln->score($evalue);
201 3         16 return $aln;
202             }
203              
204 0         0 return;
205             }
206              
207             =head2 write_aln
208              
209             Title : write_aln
210             Usage : $stream->write_aln(@aln)
211             Function: Not implemented
212             Returns : 1 for success and 0 for error
213             Args : Bio::SimpleAlign object
214              
215             =cut
216              
217             sub write_aln {
218 0     0 1 0 my ( $self, @aln ) = @_;
219 0         0 $self->throw_not_implemented();
220             }
221              
222             # ----------------------------------------
223             # - Private methods
224             # ----------------------------------------
225              
226             sub _initialize {
227 3     3   7 my ( $self, @args ) = @_;
228              
229             # Call into our base version
230 3         12 $self->SUPER::_initialize(@args);
231              
232             # Then initialize our data variables
233 3         6 $self->{'seen_header'} = 0;
234             }
235              
236             1;