File Coverage

blib/lib/Bio/Tools/Run/MCS.pm
Criterion Covered Total %
statement 29 83 34.9
branch 1 28 3.5
condition 0 5 0.0
subroutine 11 14 78.5
pod 4 4 100.0
total 45 134 33.5


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::MCS
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::MCS - Wrapper for MCS
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::Run::MCS;
21              
22             # Make a MCS factory
23             $factory = Bio::Tools::Run::MCS->new();
24              
25             # Run MCS on an alignment
26             my @results = $factory->run($alignfilename);
27              
28             # or with alignment object
29             @results = $factory->run($bio_simplalign);
30              
31             # look at the results
32             foreach my $feat (@results) {
33             my $seq_id = $feat->seq_id;
34             my $start = $feat->start;
35             my $end = $feat->end;
36             my $score = $feat->score;
37             my ($pvalue) = $feat->get_tag_values('pvalue');
38             my ($kind) = $feat->get_tag_values('kind'); # 'all', 'exon' or 'nonexon'
39             }
40              
41             =head1 DESCRIPTION
42              
43             This is a wrapper for running the MCS (binCons) scripts by Elliott H Margulies.
44             You can get details here: http://zoo.nhgri.nih.gov/elliott/mcs_doc/. MCS is used
45             for the prediciton of transcription factor binding sites and other regions of
46             the genome conserved amongst different species.
47              
48             Note that this wrapper assumes you already have alignments, so only uses MCS
49             for the latter stages (the stages involving align2binomial.pl,
50             generate_phyloMAX_score.pl and generate_mcs_beta.pl).
51              
52             You can try supplying normal MCS command-line arguments to new(), eg.
53              
54             $factory->new(-percentile => 95)
55              
56             or calling arg-named methods (excluding the initial
57             hyphens, eg.
58              
59             $factory->percentile(95)
60              
61             to set the --percentile arg).
62              
63              
64             You will need to enable this MCS wrapper to find the MCS scripts.
65             This can be done in (at least) three ways:
66              
67             1. Make sure the MCS scripts are in your path.
68             2. Define an environmental variable MCSDIR which is a
69             directory which contains the MCS scripts:
70             In bash:
71              
72             export MCSDIR=/home/username/mcs/
73              
74             In csh/tcsh:
75              
76             setenv MCSDIR /home/username/mcs
77              
78             3. Include a definition of an environmental variable MCSDIR in
79             every script that will use this MCS wrapper module, e.g.:
80              
81             BEGIN { $ENV{MCSDIR} = '/home/username/mcs/' }
82             use Bio::Tools::Run::MCS;
83              
84             =head1 FEEDBACK
85              
86             =head2 Mailing Lists
87              
88             User feedback is an integral part of the evolution of this and other
89             Bioperl modules. Send your comments and suggestions preferably to
90             the Bioperl mailing list. Your participation is much appreciated.
91              
92             bioperl-l@bioperl.org - General discussion
93             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
94              
95             =head2 Support
96              
97             Please direct usage questions or support issues to the mailing list:
98              
99             I
100              
101             rather than to the module maintainer directly. Many experienced and
102             reponsive experts will be able look at the problem and quickly
103             address it. Please include a thorough description of the problem
104             with code and data examples if at all possible.
105              
106             =head2 Reporting Bugs
107              
108             Report bugs to the Bioperl bug tracking system to help us keep track
109             of the bugs and their resolution. Bug reports can be submitted via
110             the web:
111              
112             http://redmine.open-bio.org/projects/bioperl/
113              
114             =head1 AUTHOR - Sendu Bala
115              
116             Email bix@sendu.me.uk
117              
118             =head1 APPENDIX
119              
120             The rest of the documentation details each of the object methods.
121             Internal methods are usually preceded with a _
122              
123             =cut
124              
125             package Bio::Tools::Run::MCS;
126 1     1   2420 use strict;
  1         2  
  1         24  
127              
128 1     1   4 use Cwd;
  1         2  
  1         47  
129 1     1   5 use File::Spec;
  1         2  
  1         18  
130 1     1   305 use Bio::AlignIO;
  1         75975  
  1         29  
131 1     1   10 use Bio::FeatureIO;
  1         2  
  1         21  
132 1     1   5 use Bio::Annotation::SimpleValue;
  1         3  
  1         19  
133              
134 1     1   4 use base qw(Bio::Tools::Run::Phylo::PhyloBase);
  1         2  
  1         439  
135              
136             our $PROGRAM_NAME = 'align2binomial.pl';
137             our $PROGRAM_DIR;
138              
139             # methods for the mcs args we support
140             our @PARAMS = qw(neutral percentile mcs specificity sensitivity name);
141             our @SWITCHES = qw(neg-score);
142              
143             # just to be explicit, args we don't support (yet) or we handle ourselves
144             our @UNSUPPORTED = qw(ucsc gtf neutral-only fourd-align align-only ar);
145              
146             BEGIN {
147             # lets add all the mcs scripts to the path so that when we call
148             # align2binomial.pl it can find its siblings
149 1     1   4 $PROGRAM_DIR = $ENV{'MCSDIR'};
150 1 50       627 $ENV{PATH} = "$PROGRAM_DIR:$ENV{PATH}" if $PROGRAM_DIR;
151             }
152              
153             =head2 program_name
154              
155             Title : program_name
156             Usage : $factory>program_name()
157             Function: holds the program name
158             Returns : string
159             Args : None
160              
161             =cut
162              
163             sub program_name {
164 7     7 1 28 return $PROGRAM_NAME;
165             }
166              
167             =head2 program_dir
168              
169             Title : program_dir
170             Usage : $factory->program_dir(@params)
171             Function: returns the program directory, obtained from ENV variable.
172             Returns : string
173             Args : None
174              
175             =cut
176              
177             sub program_dir {
178 4     4 1 16 return $PROGRAM_DIR;
179             }
180              
181             =head2 new
182              
183             Title : new
184             Usage : $factory = Bio::Tools::Run::MCS->new()
185             Function: creates a new MCS factory
186             Returns : Bio::Tools::Run::MCS
187             Args : Many options understood by MCS can be supplied as key =>
188             value pairs.
189              
190             These options can NOT be used with this wrapper:
191             ucsc gtf neutral-only fourd-align align-only ar
192              
193             =cut
194              
195             sub new {
196 1     1 1 21201 my ($class, @args) = @_;
197 1         17 my $self = $class->SUPER::new(@args);
198            
199 1         53 $self->_set_from_args(\@args, -methods => [@PARAMS, @SWITCHES, 'quiet'],
200             -create => 1);
201            
202 1         16 return $self;
203             }
204              
205             =head2 run
206              
207             Title : run
208             Usage : $result = $factory->run($align_file_or_object, Bio::Location::Atomic,
209             [Bio::SeqFeatureI]);
210             Function: Runs the MCS scripts on an alignment.
211             Returns : list of Bio::SeqFeatureI feature objects (with coordinates corrected
212             according to the supplied offset, if any)
213             Args : The first argument represents an alignment, the optional second
214             argument represents the chromosome, stand and end and the optional
215             third argument represents annotation of the exons in the alignment.
216              
217             The alignment can be provided as a multi-fasta format alignment
218             filename, or a Bio::Align::AlignI compliant object (eg. a
219             Bio::SimpleAlign).
220              
221             The position in the genome can be provided as a Bio::Location::Atomic
222             with start, end and seq_id set.
223              
224             The annnotation can be provided as an array of Bio::SeqFeatureI
225             objects.
226              
227             =cut
228              
229             sub run {
230 0     0 1   my ($self, $aln, $offset, $exon_feats) = @_;
231 0   0       $self->_alignment($aln || $self->alignment || $self->throw("An alignment must be supplied"));
232            
233 0           return $self->_run($offset, $exon_feats);
234             }
235              
236             sub _run {
237 0     0     my ($self, $atomic, $exon_feats) = @_;
238            
239 0   0       my $exe = $self->executable || return;
240            
241             # cd to a temp dir
242 0           my $temp_dir = $self->tempdir;
243 0           my $cwd = Cwd->cwd();
244 0 0         chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
245            
246 0           my $offset = '';
247 0           my $start_adjust = 0;
248 0 0         if ($atomic) {
249 0           $start_adjust = $atomic->start;
250 0           $offset = '--ucsc '.$atomic->seq_id.':'.$start_adjust.'-'.$atomic->end;
251 0           $start_adjust--;
252             }
253            
254 0           my $gtf_file = 'exons.gtf';
255 0 0         if ($exon_feats) {
256 0           my $fout = Bio::FeatureIO->new(-file => ">$gtf_file", -format => 'gtf');
257 0           foreach my $feat (@{$exon_feats}) {
  0            
258 0           $fout->write_feature($feat);
259             }
260             }
261 0 0         my $gtf = $exon_feats ? "--gtf $gtf_file" : '';
262            
263             # step '2' (http://zoo.nhgri.nih.gov/elliott/mcs_doc/node1.html) of MCS:
264             # run align2binomial.pl to calculate individual species binomial scores
265 0           my $aln_file = $self->_write_alignment;
266 0           my $error_file = 'stderr';
267 0           my $binomial_file = 'align_name.binomial';
268 0           my $cmd = "align2binomial.pl $offset $gtf $aln_file > $binomial_file 2> $error_file";
269             #system("rm -fr $cwd/mcs_dir; cp -R $temp_dir $cwd/mcs_dir");
270 0           my $throw = system($cmd);
271 0 0         open(my $efh, "<", $error_file) || $self->throw("Could not open error file '$error_file'");
272 0           my $error;
273 0           while (<$efh>) {
274 0           $error .= $_;
275 0 0         $throw = 1 if /not divisible by 3/;
276             }
277 0           close($efh);
278 0 0         $self->throw($error) if $throw;
279            
280             # step '3': run generate_phyloMAX_score.pl to combine the individual
281             # binomial scores and generate the final Multi-species Conservation Score
282 0           my $phylo_file = 'align_name.phylo';
283 0 0         system("generate_phyloMAX_score.pl $binomial_file > $phylo_file") && $self->throw("generate_phyloMAX_score.pl call failed: $?, $!");
284            
285             # step '4': Generate MCSs from the conservation score using
286             # generate_mcs_beta.pl
287 0           my $mcs_file = 'mcs_result.stdout';
288 0           my $bed_file = 'align_name.bed'; # hardcoded in generate_mcs_beta.pl
289 0 0         system("generate_mcs_beta.pl $offset $gtf $phylo_file > $mcs_file") && $self->throw("generate_mcs_beta.pl failed: $?, $!");
290            
291 0           my @feats;
292 0           my $fin = Bio::FeatureIO->new(-file => $bed_file, -format => 'bed');
293 0           my $source = Bio::Annotation::SimpleValue->new(-value => 'MCS');
294 0           while (my $feat = $fin->next_feature()) {
295             # convert coords given offset
296 0 0         if ($start_adjust) {
297 0           $feat->start($feat->start + $start_adjust);
298 0           $feat->end($feat->end + $start_adjust);
299             }
300 0           $feat->source($source);
301 0           push(@feats, $feat);
302             }
303            
304             # cd back again
305 0 0         chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
306            
307 0           return @feats;
308             }
309              
310             =head2 _setparams
311              
312             Title : _setparams
313             Usage : Internal function, not to be called directly
314             Function: Creates a string of params to be used in the command string
315             Returns : string of params
316             Args : none
317              
318             =cut
319              
320             sub _setparams {
321 0     0     my $self = shift;
322            
323 0           my $param_string = $self->SUPER::_setparams(-params => \@PARAMS,
324             -switches => \@SWITCHES,
325             -dash => 1);
326            
327 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
328 0 0         $param_string .= " 1>$null" if $self->quiet;
329            
330 0           return $param_string;
331             }
332              
333             1;