File Coverage

blib/lib/Bio/Tools/Run/MCS.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::MCS
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::MCS - Wrapper for MCS
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::Run::MCS;
21              
22             # Make a MCS factory
23             $factory = Bio::Tools::Run::MCS->new();
24              
25             # Run MCS on an alignment
26             my @results = $factory->run($alignfilename);
27              
28             # or with alignment object
29             @results = $factory->run($bio_simplalign);
30              
31             # look at the results
32             foreach my $feat (@results) {
33             my $seq_id = $feat->seq_id;
34             my $start = $feat->start;
35             my $end = $feat->end;
36             my $score = $feat->score;
37             my ($pvalue) = $feat->get_tag_values('pvalue');
38             my ($kind) = $feat->get_tag_values('kind'); # 'all', 'exon' or 'nonexon'
39             }
40              
41             =head1 DESCRIPTION
42              
43             This is a wrapper for running the MCS (binCons) scripts by Elliott H Margulies.
44             You can get details here: http://zoo.nhgri.nih.gov/elliott/mcs_doc/. MCS is used
45             for the prediciton of transcription factor binding sites and other regions of
46             the genome conserved amongst different species.
47              
48             Note that this wrapper assumes you already have alignments, so only uses MCS
49             for the latter stages (the stages involving align2binomial.pl,
50             generate_phyloMAX_score.pl and generate_mcs_beta.pl).
51              
52             You can try supplying normal MCS command-line arguments to new(), eg.
53              
54             $factory->new(-percentile => 95)
55              
56             or calling arg-named methods (excluding the initial
57             hyphens, eg.
58              
59             $factory->percentile(95)
60              
61             to set the --percentile arg).
62              
63              
64             You will need to enable this MCS wrapper to find the MCS scripts.
65             This can be done in (at least) three ways:
66              
67             1. Make sure the MCS scripts are in your path.
68             2. Define an environmental variable MCSDIR which is a
69             directory which contains the MCS scripts:
70             In bash:
71              
72             export MCSDIR=/home/username/mcs/
73              
74             In csh/tcsh:
75              
76             setenv MCSDIR /home/username/mcs
77              
78             3. Include a definition of an environmental variable MCSDIR in
79             every script that will use this MCS wrapper module, e.g.:
80              
81             BEGIN { $ENV{MCSDIR} = '/home/username/mcs/' }
82             use Bio::Tools::Run::MCS;
83              
84             =head1 FEEDBACK
85              
86             =head2 Mailing Lists
87              
88             User feedback is an integral part of the evolution of this and other
89             Bioperl modules. Send your comments and suggestions preferably to
90             the Bioperl mailing list. Your participation is much appreciated.
91              
92             bioperl-l@bioperl.org - General discussion
93             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
94              
95             =head2 Support
96              
97             Please direct usage questions or support issues to the mailing list:
98              
99             I
100              
101             rather than to the module maintainer directly. Many experienced and
102             reponsive experts will be able look at the problem and quickly
103             address it. Please include a thorough description of the problem
104             with code and data examples if at all possible.
105              
106             =head2 Reporting Bugs
107              
108             Report bugs to the Bioperl bug tracking system to help us keep track
109             of the bugs and their resolution. Bug reports can be submitted via
110             the web:
111              
112             http://redmine.open-bio.org/projects/bioperl/
113              
114             =head1 AUTHOR - Sendu Bala
115              
116             Email bix@sendu.me.uk
117              
118             =head1 APPENDIX
119              
120             The rest of the documentation details each of the object methods.
121             Internal methods are usually preceded with a _
122              
123             =cut
124              
125             package Bio::Tools::Run::MCS;
126 1     1   120235 use strict;
  1         2  
  1         23  
127              
128 1     1   3 use Cwd;
  1         1  
  1         49  
129 1     1   4 use File::Spec;
  1         0  
  1         22  
130 1     1   423 use Bio::AlignIO;
  1         87772  
  1         27  
131 1     1   216 use Bio::FeatureIO;
  0            
  0            
132             use Bio::Annotation::SimpleValue;
133              
134             use base qw(Bio::Tools::Run::Phylo::PhyloBase);
135              
136             our $PROGRAM_NAME = 'align2binomial.pl';
137             our $PROGRAM_DIR;
138              
139             # methods for the mcs args we support
140             our @PARAMS = qw(neutral percentile mcs specificity sensitivity name);
141             our @SWITCHES = qw(neg-score);
142              
143             # just to be explicit, args we don't support (yet) or we handle ourselves
144             our @UNSUPPORTED = qw(ucsc gtf neutral-only fourd-align align-only ar);
145              
146             BEGIN {
147             # lets add all the mcs scripts to the path so that when we call
148             # align2binomial.pl it can find its siblings
149             $PROGRAM_DIR = $ENV{'MCSDIR'};
150             $ENV{PATH} = "$PROGRAM_DIR:$ENV{PATH}" if $PROGRAM_DIR;
151             }
152              
153             =head2 program_name
154              
155             Title : program_name
156             Usage : $factory>program_name()
157             Function: holds the program name
158             Returns : string
159             Args : None
160              
161             =cut
162              
163             sub program_name {
164             return $PROGRAM_NAME;
165             }
166              
167             =head2 program_dir
168              
169             Title : program_dir
170             Usage : $factory->program_dir(@params)
171             Function: returns the program directory, obtained from ENV variable.
172             Returns : string
173             Args : None
174              
175             =cut
176              
177             sub program_dir {
178             return $PROGRAM_DIR;
179             }
180              
181             =head2 new
182              
183             Title : new
184             Usage : $factory = Bio::Tools::Run::MCS->new()
185             Function: creates a new MCS factory
186             Returns : Bio::Tools::Run::MCS
187             Args : Many options understood by MCS can be supplied as key =>
188             value pairs.
189              
190             These options can NOT be used with this wrapper:
191             ucsc gtf neutral-only fourd-align align-only ar
192              
193             =cut
194              
195             sub new {
196             my ($class, @args) = @_;
197             my $self = $class->SUPER::new(@args);
198            
199             $self->_set_from_args(\@args, -methods => [@PARAMS, @SWITCHES, 'quiet'],
200             -create => 1);
201            
202             return $self;
203             }
204              
205             =head2 run
206              
207             Title : run
208             Usage : $result = $factory->run($align_file_or_object, Bio::Location::Atomic,
209             [Bio::SeqFeatureI]);
210             Function: Runs the MCS scripts on an alignment.
211             Returns : list of Bio::SeqFeatureI feature objects (with coordinates corrected
212             according to the supplied offset, if any)
213             Args : The first argument represents an alignment, the optional second
214             argument represents the chromosome, stand and end and the optional
215             third argument represents annotation of the exons in the alignment.
216              
217             The alignment can be provided as a multi-fasta format alignment
218             filename, or a Bio::Align::AlignI compliant object (eg. a
219             Bio::SimpleAlign).
220              
221             The position in the genome can be provided as a Bio::Location::Atomic
222             with start, end and seq_id set.
223              
224             The annnotation can be provided as an array of Bio::SeqFeatureI
225             objects.
226              
227             =cut
228              
229             sub run {
230             my ($self, $aln, $offset, $exon_feats) = @_;
231             $self->_alignment($aln || $self->alignment || $self->throw("An alignment must be supplied"));
232            
233             return $self->_run($offset, $exon_feats);
234             }
235              
236             sub _run {
237             my ($self, $atomic, $exon_feats) = @_;
238            
239             my $exe = $self->executable || return;
240            
241             # cd to a temp dir
242             my $temp_dir = $self->tempdir;
243             my $cwd = Cwd->cwd();
244             chdir($temp_dir) || $self->throw("Couldn't change to temp dir '$temp_dir'");
245            
246             my $offset = '';
247             my $start_adjust = 0;
248             if ($atomic) {
249             $start_adjust = $atomic->start;
250             $offset = '--ucsc '.$atomic->seq_id.':'.$start_adjust.'-'.$atomic->end;
251             $start_adjust--;
252             }
253            
254             my $gtf_file = 'exons.gtf';
255             if ($exon_feats) {
256             my $fout = Bio::FeatureIO->new(-file => ">$gtf_file", -format => 'gtf');
257             foreach my $feat (@{$exon_feats}) {
258             $fout->write_feature($feat);
259             }
260             }
261             my $gtf = $exon_feats ? "--gtf $gtf_file" : '';
262            
263             # step '2' (http://zoo.nhgri.nih.gov/elliott/mcs_doc/node1.html) of MCS:
264             # run align2binomial.pl to calculate individual species binomial scores
265             my $aln_file = $self->_write_alignment;
266             my $error_file = 'stderr';
267             my $binomial_file = 'align_name.binomial';
268             my $cmd = "align2binomial.pl $offset $gtf $aln_file > $binomial_file 2> $error_file";
269             #system("rm -fr $cwd/mcs_dir; cp -R $temp_dir $cwd/mcs_dir");
270             my $throw = system($cmd);
271             open(my $efh, "<", $error_file) || $self->throw("Could not open error file '$error_file'");
272             my $error;
273             while (<$efh>) {
274             $error .= $_;
275             $throw = 1 if /not divisible by 3/;
276             }
277             close($efh);
278             $self->throw($error) if $throw;
279            
280             # step '3': run generate_phyloMAX_score.pl to combine the individual
281             # binomial scores and generate the final Multi-species Conservation Score
282             my $phylo_file = 'align_name.phylo';
283             system("generate_phyloMAX_score.pl $binomial_file > $phylo_file") && $self->throw("generate_phyloMAX_score.pl call failed: $?, $!");
284            
285             # step '4': Generate MCSs from the conservation score using
286             # generate_mcs_beta.pl
287             my $mcs_file = 'mcs_result.stdout';
288             my $bed_file = 'align_name.bed'; # hardcoded in generate_mcs_beta.pl
289             system("generate_mcs_beta.pl $offset $gtf $phylo_file > $mcs_file") && $self->throw("generate_mcs_beta.pl failed: $?, $!");
290            
291             my @feats;
292             my $fin = Bio::FeatureIO->new(-file => $bed_file, -format => 'bed');
293             my $source = Bio::Annotation::SimpleValue->new(-value => 'MCS');
294             while (my $feat = $fin->next_feature()) {
295             # convert coords given offset
296             if ($start_adjust) {
297             $feat->start($feat->start + $start_adjust);
298             $feat->end($feat->end + $start_adjust);
299             }
300             $feat->source($source);
301             push(@feats, $feat);
302             }
303            
304             # cd back again
305             chdir($cwd) || $self->throw("Couldn't change back to working directory '$cwd'");
306            
307             return @feats;
308             }
309              
310             =head2 _setparams
311              
312             Title : _setparams
313             Usage : Internal function, not to be called directly
314             Function: Creates a string of params to be used in the command string
315             Returns : string of params
316             Args : none
317              
318             =cut
319              
320             sub _setparams {
321             my $self = shift;
322            
323             my $param_string = $self->SUPER::_setparams(-params => \@PARAMS,
324             -switches => \@SWITCHES,
325             -dash => 1);
326            
327             my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
328             $param_string .= " 1>$null" if $self->quiet;
329            
330             return $param_string;
331             }
332              
333             1;