File Coverage

blib/lib/BioX/Workflow/Plugin/Drake.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1             package BioX::Workflow::Plugin::Drake;
2              
3             our $VERSION = '0.10';
4 1     1   27713 use Data::Dumper;
  1         10607  
  1         81  
5 1     1   1580 use Data::Pairs;
  0            
  0            
6              
7             use Moose::Role;
8             #extends 'BioX::Workflow';
9              
10             use Interpolation E => 'eval';
11              
12             =head1 NAME
13              
14             BioX::Workflow::Plugin::Drake - A very opinionated template based bioinformatics workflow writer for Drake.
15              
16             =head1 SYNOPSIS
17              
18             The main documentation for this module is at L. This module extends Workflow in order to add functionality for outputing workflows in drake format.
19              
20             biox-workflow-drake.pl --workflow workflow.yml > workflow.drake
21             drake --workflow workflow.drake #with other functionality such as --jobs for asynchronous output, etc.
22              
23             More information about Drake can be found here L.
24              
25             =head2 Default Variables
26              
27             BioX::Workflow::Plugin::Drake assumes your INPUT/OUTPUT and indir/outdirs are
28             linked.
29              
30             This means the output from step1 is the input for step2.
31              
32             You can override this behavior by either declaring any of these values, or in the global
33             variables set auto_input: 0, disable automatic indir/outdir naming with
34             auto_name: 0, and disable automatically naming outdirectories by rule names with
35             enforce_struct: 0.
36              
37              
38             =head2 Example
39              
40             =head3 workflow.yml
41              
42             ---
43             global:
44             - indir: /home/user/workflow
45             - outdir: /home/user/workflow/output
46             - file_rule: (.csv)$
47             rules:
48             - backup:
49             local:
50             - INPUT: "{$self->indir}/{$sample}.csv"
51             - OUTPUT: "{$self->outdir}/{$sample}.csv"
52             - thing: "other thing"
53             process: |
54             cp $INPUT $OUTPUT
55             - grep_VARA:
56             local:
57             - OUTPUT: "{$self->outdir}/{$sample}.grep_VARA.csv"
58             process: |
59             echo "Working on {$self->{indir}}/{$sample.csv}"
60             grep -i "VARA" {$self->indir}/{$sample}.csv >> {$self->outdir}/{$sample}.grep_VARA.csv \
61             || touch {$self->OUTPUT}
62             - grep_VARB:
63             local:
64             - OUTPUT: "{$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv"
65             process: |
66             grep -i "VARB" {$self->indir}/{$sample}.grep_VARA.csv >> {$self->outdir}/{$sample}.grep_VARA.grep_VARB.csv || touch {$self->OUTPUT}
67              
68             =head3 Notes on the drake.yml
69              
70             Drake will stop everything if you're job returns with an exit code of anything
71             besides 0. For this reason we have the last command have a command1 || command2
72             syntax, so that even if we don't grep any "VARB" from the file the workflow
73             could continue.
74              
75             =head3 Run it with default setup
76              
77             biox-workflow-drake.pl --workflow workflow.yml > workflow.full.drake
78              
79             =head3 Output with default setup
80              
81             I don't want to inlcude the whole file, but you get the idea
82              
83             ;
84             ; Generated at: 2015-06-21T11:01:24
85             ; This file was generated with the following options
86             ; --workflow drake.yml
87             ; --min 1
88             ;
89              
90             ;
91             ; Samples: test1, test2
92             ;
93             ;
94             ; Starting Workflow
95             ;
96              
97             ;
98             ; Starting backup
99             ;
100              
101              
102             ;
103             ; Variables
104             ; Indir: /home/guests/jir2004/workflow
105             ; Outdir: /home/guests/jir2004/workflow/output/backup
106             ; Local Variables:
107             ; INPUT: {$self->indir}/{$sample}.csv
108             ; OUTPUT: {$self->outdir}/{$sample}.csv
109             ; thing: other thing
110             ;
111              
112             /home/guests/jir2004/workflow/output/backup/$[SAMPLE].csv <- /home/guests/jir2004/workflow/$[SAMPLE].csv
113             cp $INPUT $OUTPUT
114              
115              
116             ;
117             ; Ending backup
118             ;
119              
120              
121             ;
122             ; Starting grep_VARA
123             ;
124              
125              
126             Run drake
127              
128             drake --workflow workflow.full.drake
129              
130             The following steps will be run, in order:
131             1: /home/user/workflow/output/backup/test1.csv <- /home/user/workflow/test1.csv [timestamped]
132             2: /home/user/workflow/output/backup/test2.csv <- /home/user/workflow/test2.csv [timestamped]
133             3: /home/user/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/user/workflow/output/backup/test1.csv [projected timestamped]
134             4: /home/user/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/user/workflow/output/backup/test2.csv [projected timestamped]
135             5: /home/user/workflow/output/grep_varb/test1.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test1.grep_VARA.csv [projected timestamped]
136             6: /home/user/workflow/output/grep_varb/test2.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test2.grep_VARA.csv [projected timestamped]
137             Confirm? [y/n] y
138             Running 6 steps with concurrence of 1...
139              
140             --- 0. Running (timestamped): /home/user/workflow/output/backup/test1.csv <- /home/user/workflow/test1.csv
141             --- 0: /home/user/workflow/output/backup/test1.csv <- /home/user/workflow/test1.csv -> done in 0.02s
142              
143             --- 1. Running (timestamped): /home/user/workflow/output/backup/test2.csv <- /home/user/workflow/test2.csv
144             --- 1: /home/user/workflow/output/backup/test2.csv <- /home/user/workflow/test2.csv -> done in 0.01s
145              
146             --- 2. Running (timestamped): /home/user/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/user/workflow/output/backup/test1.csv
147             Working on /home/user/workflow/output/backup/test1csv
148             --- 2: /home/user/workflow/output/grep_vara/test1.grep_VARA.csv <- /home/user/workflow/output/backup/test1.csv -> done in 0.01s
149              
150             --- 3. Running (timestamped): /home/user/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/user/workflow/output/backup/test2.csv
151             Working on /home/user/workflow/output/backup/test2csv
152             --- 3: /home/user/workflow/output/grep_vara/test2.grep_VARA.csv <- /home/user/workflow/output/backup/test2.csv -> done in 0.01s
153              
154             --- 4. Running (timestamped): /home/user/workflow/output/grep_varb/test1.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test1.grep_VARA.csv
155             --- 4: /home/user/workflow/output/grep_varb/test1.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test1.grep_VARA.csv -> done in 0.01s
156              
157             --- 5. Running (timestamped): /home/user/workflow/output/grep_varb/test2.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test2.grep_VARA.csv
158             --- 5: /home/user/workflow/output/grep_varb/test2.grep_VARA.grep_VARB.csv <- /home/user/workflow/output/grep_vara/test2.grep_VARA.csv -> done in 0.08s
159             Done (6 steps run).
160              
161              
162             =head3 Run in minified mode
163              
164             As an alternative you can run this with the --min option, which instead of
165             printing out each workflow prints out only one, and creates a run-workflow.sh
166             which has all of your environmental variables.
167              
168             This option is preferable if running on an HPC cluster with many nodes.
169              
170             This WILL break with use of --resample, either local or global. You need to
171             split up your workflows as opposed to using the --resample option.
172              
173             biox-workflow-drake.pl --workflow workflow.yml --min 1 > workflow.drake #This also creates the run-workflow.sh in the same directory
174             ./run-workflow.sh
175              
176             cat drake.log #Here is the log for the first run
177              
178             2015-06-21 14:02:47,543 INFO Running 3 steps with concurrence of 1...
179             2015-06-21 14:02:47,568 INFO
180             2015-06-21 14:02:47,570 INFO --- 0. Running (timestamped): /home/user/workflow/output/backup/test1.csv <- /home/user/workflow/test1.csv
181             2015-06-21 14:02:47,592 INFO --- 0: /home/user/workflow/output/backup/test1.csv <- /home/user/workflow/test1.csv -> done in 0.02s
182              
183             #So on and so forth
184              
185             If you look in the example directory you will see a few png files, these are outputs of the drake workflow.
186              
187             =cut
188              
189             =head1 Acknowledgements
190              
191             Before version 0.03
192              
193             This module was originally developed at and for Weill Cornell Medical
194             College in Qatar within ITS Advanced Computing Team. With approval from
195             WCMC-Q, this information was generalized and put on github, for which
196             the authors would like to express their gratitude.
197              
198             As of version 0.03:
199              
200             This modules continuing development is supported by NYU Abu Dhabi in the Center for Genomics and Systems Biology.
201             With approval from NYUAD, this information was generalized and put on bitbucket, for which
202             the authors would like to express their gratitude.
203              
204             =head1 Inline Code Documentation
205              
206             You shouldn't need these, but if you do here they are.
207              
208             =head2 Attributes
209              
210             =cut
211              
212             =head3 full
213              
214             Print the whole workflow hardcoded. This is the default
215              
216             =cut
217              
218             has 'full' => (
219             is => 'rw',
220             isa => 'Bool',
221             default => 1,
222             );
223              
224             =head3 min
225              
226             Print the workflow as 2 files.
227              
228             Run the drake things
229              
230             drake --vars "SAMPLE=$sample" --workflow/workflow.drake
231              
232             workflow.drake
233              
234             Our regular file
235              
236             =cut
237              
238             has 'min' => (
239             is => 'rw',
240             isa => 'Bool',
241             default => 0,
242             );
243              
244             =head2 Subroutines
245              
246             Subroutines
247              
248             =head3 before run
249              
250             Must initialize some variables
251              
252             =cut
253              
254             before 'run' => sub{
255             my($self) = shift;
256              
257             if($self->min){
258             $self->full(0);
259             }
260             $self->wait(0);
261             $self->comment_char(';');
262             };
263              
264             =head3 after get_samples
265              
266             Things to do if we decide to do a min version
267              
268             =cut
269              
270             after 'get_samples' => sub{
271             my($self) = shift;
272              
273             return unless $self->min;
274              
275             open(my $fh, '>', 'run-workflow.sh') or die print "Could not open file $!\n";
276              
277             print $fh "#!/bin/bash\n\n";
278              
279             foreach my $sample (@{$self->samples}){
280             print $fh <
281             drake --vars "SAMPLE=$sample" --workflow workflow.drake
282             EOF
283             }
284              
285             close $fh;
286              
287             chmod 0777, 'run-workflow.sh';
288              
289             $self->samples(["\$SAMPLE"]);
290             };
291              
292             =head3 write_process
293              
294             Fill in the template with the process
295              
296             Ensure INPUT/OUTPUT exist
297              
298             Prettyify the output a bit
299              
300             =cut
301              
302             before 'write_process' => sub{
303             my($self) = shift;
304              
305             $DB::single=2;
306             if((! $self->local_attr->exists('INPUT')) && ! $self->local_attr->exists('OUTPUT') ){
307             print "$self->{comment_char} There is no INPUT or OUTPUT!\n";
308             }
309              
310             #Make the formatting a big prettier
311             my @tmp = split("\n", $self->process);
312             $self->process(join("\n\t", @tmp));
313             };
314              
315             before 'process_template' => sub {
316             my $self = shift;
317              
318             my $tmp = "{\$self->OUTPUT} <- {\$self->INPUT}\n\t";
319             $DB::single=2;
320             if($self->min){
321             $tmp =~ s/\$SAMPLE/\$[SAMPLE]/g;
322             }
323             my $newprocess = $tmp.$self->process;
324             $self->process($newprocess);
325              
326             };
327              
328             1;
329             __END__