File Coverage

blib/lib/BioX/Wrapper/Annovar.pm
Criterion Covered Total %
statement 24 197 12.1
branch 0 40 0.0
condition 0 3 0.0
subroutine 8 23 34.7
pod n/a
total 32 263 12.1


line stmt bran cond sub pod time code
1             package BioX::Wrapper::Annovar;
2              
3             #use 5.006;
4              
5 1     1   48166 use Moose;
  1         903924  
  1         12  
6 1     1   12073 use File::Find::Rule;
  1         9332  
  1         11  
7 1     1   66 use File::Basename;
  1         7  
  1         79  
8 1     1   6 use File::Path qw(make_path remove_tree);
  1         1  
  1         46  
9 1     1   4 use File::Find::Rule;
  1         1  
  1         4  
10 1     1   24 use Cwd;
  1         1  
  1         41  
11 1     1   4538 use IO::Uncompress::Gunzip;
  1         121945  
  1         52  
12 1     1   2820 use Data::Dumper;
  1         5369  
  1         2429  
13              
14             require Vcf;
15              
16             extends 'BioX::Wrapper';
17             with 'MooseX::Getopt';
18             with 'MooseX::Getopt::Usage';
19              
20             =head1 NAME
21              
22             BioX::Wrapper::Annovar - A wrapper around the annovar annotation pipeline
23              
24             =head1 VERSION
25              
26             Version 0.06
27              
28             =cut
29              
30             our $VERSION = '0.40';
31              
32              
33             =head1 SYNOPSIS
34              
35             annovar-wrapper.pl --vcfs file1.vcf,file2.vcf --annovardb_path /path/to/annovar/dbs
36              
37             This module is a wrapper around the popular annotation tool, annovar. http://www.openbioinformatics.org/annovar/ . The commands generated are taken straight from the documentation. In addition, there is an option to reannotate using vcf-annotate from vcftools.
38              
39             It takes as its input a list or directory of vcf files, bgzipped and tabixed or not, and uses annovar to create annotation files. These multianno table files can be optionally reannotated into the vcf file. This script does not actually execute any commands, only writes them to STDOUT for the user to run as they wish.
40              
41             It comes with an executable script annovar-wrapper.pl. This should be sufficient for most of your needs, but if you wish to overwrite methods you can always do so in the usual Moose fashion.
42              
43             #!/usr/bin/env perl
44              
45             package Main;
46              
47             use Moose;
48             extends 'BioX::Wrapper::Annovar';
49              
50             BioX::Wrapper::Annovar->new_with_options->run;
51              
52             sub method_to_override {
53             my $self = shift;
54              
55             #dostuff
56             };
57              
58             before 'method' => sub {
59             my $self = shift;
60              
61             #dostuff
62             };
63              
64             has '+variable' => (
65             #things to add to variable declaration
66             );
67              
68             #or
69              
70             has 'variable' => (
71             #override variable declaration
72             );
73              
74             1;
75              
76             Please see the Moose::Manual::MethodModifiers for more information.
77              
78             =head1 Prerequisites
79              
80             This module requires the annovar download. The easiest thing to do is to put the annovar scripts in your ENV{PATH}, but if you choose not to do this you can also pass in the location with
81              
82             annovar-wrapper.pl --tableannovar_path /path/to/table_annovar.pl --convert2annovar_path /path/to/convert2annovar.pl
83              
84             It requires Vcf.pm, which comes with vcftools.
85              
86             Vcftools is publicly available for download. http://vcftools.sourceforge.net/.
87              
88             export PERL5LIB=$PERL5LIB:path_to_vcftools/perl
89              
90             If you wish to you reannotate the vcf file you need to have bgzip and tabix installed, and have the executables in vcftools in your path.
91              
92             export PATH=$PATH:path_to_vcftools
93              
94             =head1 Generate an Example
95              
96             To generate an example you can run the following commands
97              
98             tabix -h ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20100804/ALL.2of4intersection.20100804.genotypes.vcf.gz 2:39967768-40000000 > test.vcf
99             bgzip test.vcf
100             tabix test.vcf.gz
101             vcf-subset -c HG00098,HG00100,HG00106,HG00112,HG00114 test.vcf.gz | bgzip -c > out.vcf.gz
102             tabix out.vcf.gz
103             rm test.vcf.gz
104             rm test.vcf.gz.tbi
105              
106             annovar-wrapper.pl --vcfs out.vcf.gz --annovar_dbs refGene --annovar_fun g --outdir annovar_out --annovardb_path /path/to/annovar/dbs > my_cmds.sh
107              
108             There is more detail on the example in the pod files.
109              
110             =cut
111              
112             =head1 Variables
113              
114             =cut
115              
116             =head2 Annovar Options
117              
118             =cut
119              
120             =head3 tableannovar_path
121              
122             You can put the location of the annovar scripts in your ENV{PATH}, and the default is fine. If annovar is not in your PATH, please supply the location.
123              
124             =cut
125              
126             has 'tableannovar_path' => (
127             is => 'rw',
128             isa => 'Str',
129             required => 1,
130             default => "table_annovar.pl"
131             );
132              
133             =head3 convert2annovar_path
134              
135             You can put the location of the annovar scripts in your ENV{PATH}, and the default is fine. If annovar is not in your PATH, please supply the location
136              
137             =cut
138              
139             has 'convert2annovar_path' => (
140             is => 'rw',
141             isa => 'Str',
142             required => 1,
143             default => "convert2annovar.pl"
144             );
145              
146             =head3 annovardb_path
147              
148             Path to your annovar databases
149              
150             =cut
151              
152             has 'annovardb_path' => (
153             is => 'rw',
154             isa => 'Str',
155             default => '/data/apps/software/annovar/hg19',
156             required => 1,
157             );
158              
159             =head3 buildver
160              
161             Its probably hg19 or hg18
162              
163             =cut
164              
165             has 'buildver' => (
166             is => 'rw',
167             isa => 'Str',
168             default => 'hg19',
169             required => 1,
170             );
171              
172             =head3 convert2annovar_opts
173              
174             Assumes vcf version 4 and that you want to convert all samples
175              
176             Not using --allsample on a multisample vcf is untested and will probably break the whole pipeline
177              
178             =cut
179              
180             has 'convert2annovar_opts' => (
181             is => 'rw',
182             isa => 'Str',
183             default => '-format vcf4 --allsample',
184             );
185              
186             =head3 annovar_dbs
187              
188             These are pretty much all the databases listed on
189              
190             http://www.openbioinformatics.org/annovar/annovar_download.html for hg19 that I tested as working
191              
192             #Download databases with
193              
194             cd path_to_annovar_dir
195             ./annotate_variation.pl --buildver hg19 -downdb -webfrom annovar esp6500si_aa hg19/
196              
197             #Option is an ArrayRef, and can be given as either
198              
199             --annovar_dbs cg46,cg69,nci60
200              
201             #or
202              
203             --annovar_dbs cg46 --annovar_dbs cg69 --annovar_dbs nci60
204              
205             =cut
206              
207             #TODO
208             #Add in a hashref so I don't have to remember the list
209             #The following are redundant within other databases
210             #esp are in popfreq_all
211             #esp6500si_aa
212             #esp6500si_ea
213             #ljb23 are in ljb23_all
214             #ljb23_fathmm
215             #ljb23_gerp++
216             #ljb23_lrt
217             #ljb23_ma
218             #ljb23_metalr
219             #ljb23_metasvm
220             #ljb23_mt
221             #ljb23_phylop
222             #ljb23_pp2hdiv
223             #ljb23_pp2hvar
224             #ljb23_sift
225             #ljb23_siphy
226             #ljb2_pp2hvar
227             #Leaving out cg46
228             #
229             ## The following have been tested
230             # snp138NonFlagged,snp138,popfreq_all,cg69,cosmic68wgs,clinvar_20140211,gwasCatalog,caddgt20,phastConsElements46way,gerp++elem,wgEncodeBroadHmmGm12878HMM,wgEncodeUwDnaseGm12878HotspotsRep2,ljb23_all,refGene
231              
232             has 'annovar_dbs' => (
233             is => 'rw',
234             isa => 'ArrayRef',
235             required => 0,
236             default => sub {
237             return [qw(snp138NonFlagged
238             snp138
239             popfreq_all
240             cg69
241             cosmic68wgs
242             clinvar_20140211
243             gwasCatalog
244             caddgt20
245             phastConsElements46way
246             gerp++elem
247             wgEncodeBroadHmmGm12878HMM
248             wgEncodeUwDnaseGm12878HotspotsRep2
249             ljb23_all
250             refGene
251             )]
252             }
253             );
254              
255             =head3 annovar_fun
256              
257             Functions of the individual databases can be found at
258              
259             What function your DB may already be listed otherwise it is probably listed in the URLS under Annotation: Gene-Based, Region-Based, or Filter-Based
260              
261             Functions must be given in the corresponding order of your annovar_dbs
262              
263             #Option is an ArrayRef, and can be given as either
264              
265             --annovar_fun f,f,g
266              
267             #or
268              
269             --annovar_fun f --annovar_fun f --annovar_fun g
270              
271             =cut
272              
273             has 'annovar_fun' => (
274             is => 'rw',
275             isa => 'ArrayRef',
276             required => 0,
277             default => sub {
278             return [qw(f
279             f
280             f
281             f
282             f
283             f
284             r
285             f
286             r
287             f
288             r
289             r
290             f
291             g)]
292             }
293             );
294              
295             =head3 annovar_cols
296              
297             Some database annotations generate multiple columns. For reannotating the vcf we need to know what these columns are. Below are the columns generated for the databases given in annovar_dbs
298              
299             To add give a hashref of array
300              
301             =cut
302              
303             has 'annovar_cols' => (
304             is => 'rw',
305             isa => 'HashRef',
306             required => 0,
307             default => sub {
308             my $href = {};
309             #Old table_annovar.pl script
310             # $href->{popfreq_max} = ["PopFreqMax"];
311             # $href->{popfreq_all} = ["PopFreqMax","1000G2012APR_ALL","1000G2012APR_AFR","1000G2012APR_AMR","1000G2012APR_ASN","1000G2012APR_EUR","ESP6500si_AA","ESP6500si_EA","CG46","NCI60","SNP137","COSMIC65","DISEASE"];
312             # $href->{refGene} = ["Func.refGene","Gene.refGene","ExonicFunc.refGene","AAChange.refGene"];
313             # $href->{ljb_all} = [ qw/LJB_PhyloP LJB_PhyloP_Pred LJB_SIFT LJB_SIFT_Pred LJB_PolyPhen2 LJB_PolyPhen2_Pred LJB_LRT LJB_LRT_Pred LJB_MutationTaster LJB_MutationTaster_Pred LJB_GERP++/ ];
314             # $href->{ljb2_all} = [ qw/LJB2_SIFT LJB2_PolyPhen2_HDIV LJB2_PP2_HDIV_Pred LJB2_PolyPhen2_HVAR LJB2_PolyPhen2_HVAR_Pred LJB2_LRT LJB2_LRT_Pred LJB2_MutationTaster LJB2_MutationTaster_Pred LJB_MutationAssessor LJB_MutationAssessor_Pred LJB2_FATHMM LJB2_GERP++ LJB2_PhyloP LJB2_SiPhy/ ];
315             # $href->{ljb23_all} = [ qw/LJB23_SIFT_score LJB23_SIFT_score_converted LJB23_SIFT_pred LJB23_Polyphen2_HDIV_score LJB23_Polyphen2_HDIV_pred LJB23_Polyphen2_HVAR_score LJB23_Polyphen2_HVAR_pred LJB23_LRT_score LJB23_LRT_score_converted LJB23_LRT_pred LJB23_MutationTaster_score LJB23_MutationTaster_score_converted LJB23_MutationTaster_pred LJB23_MutationAssessor_score LJB23_MutationAssessor_score_converted LJB23_MutationAssessor_pred LJB23_FATHMM_score LJB23_FATHMM_score_converted LJB23_FATHMM_pred LJB23_RadialSVM_score LJB23_RadialSVM_score_converted LJB23_RadialSVM_pred LJB23_LR_score LJB23_LR_pred LJB23_GERP++ LJB23_PhyloP LJB23_SiPhy/ ];
316              
317             # #Which one?
318             # #$href->{refGene} = ["Func.refGene","Gene.refGene", "ExonicFunc.refGene","AAChange.refGene"];
319             $href->{refGene} = ["Func.refGene","Gene.refGene","GeneDetail.refGene", "ExonicFunc.refGene","AAChange.refGene"];
320             $href->{ljb_all} = [ qw/LJB_PhyloP LJB_PhyloP_Pred LJB_SIFT LJB_SIFT_Pred LJB_PolyPhen2 LJB_PolyPhen2_Pred LJB_LRT LJB_LRT_Pred LJB_MutationTaster LJB_MutationTaster_Pred LJB_GERPPP/ ];
321             $href->{ljb2_all} = [ qw/LJB2_SIFT LJB2_PolyPhen2_HDIV LJB2_PP2_HDIV_Pred LJB2_PolyPhen2_HVAR LJB2_PolyPhen2_HVAR_Pred LJB2_LRT LJB2_LRT_Pred LJB2_MutationTaster LJB2_MutationTaster_Pred LJB_MutationAssessor LJB_MutationAssessor_Pred LJB2_FATHMM LJB2_GERPPP LJB2_PhyloP LJB2_SiPhy/ ];
322             $href->{ljb23_all} = [ qw/LJB23_SIFT_score LJB23_SIFT_score_converted LJB23_SIFT_pred LJB23_Polyphen2_HDIV_score LJB23_Polyphen2_HDIV_pred LJB23_Polyphen2_HVAR_score LJB23_Polyphen2_HVAR_pred LJB23_LRT_score LJB23_LRT_score_converted LJB23_LRT_pred LJB23_MutationTaster_score LJB23_MutationTaster_score_converted LJB23_MutationTaster_pred LJB23_MutationAssessor_score LJB23_MutationAssessor_score_converted LJB23_MutationAssessor_pred LJB23_FATHMM_score LJB23_FATHMM_score_converted LJB23_FATHMM_pred LJB23_RadialSVM_score LJB23_RadialSVM_score_converted LJB23_RadialSVM_pred LJB23_LR_score LJB23_LR_pred LJB23_GERPPP LJB23_PhyloP LJB23_SiPhy/ ];
323             $href->{popfreq_all} = [ qw/PopFreqMax 1000G2012APR_ALL 1000G2012APR_AFR 1000G2012APR_AMR 1000G2012APR_ASN 1000G2012APR_EUR ESP6500si_ALL ESP6500si_AA ESP6500si_EA CG46/ ];
324             return $href;
325             }
326             );
327              
328             =head3 vcfs
329              
330             VCF files can be given individually as well.
331              
332             #Option is an ArrayRef and can be given as either
333              
334             --vcfs 1.vcf,2.vcf,3.vcfs
335              
336             #or
337              
338             --vcfs 1.vcf --vcfs 2.vcf --vcfs 3.vcf
339              
340             Don't mix the methods
341              
342             =cut
343              
344             has 'vcfs' => (
345             is => 'rw',
346             isa => 'ArrayRef',
347             required => 0,
348             );
349              
350              
351              
352             =head3 annotate_vcf
353              
354             Use vcf-annotate from VCF tools to annotate the VCF file
355              
356             This does not overwrite the original VCF file, but instead creates a new one
357              
358             To turn this off
359              
360             annovar-wrapper.pl --annotate_vcf 0
361              
362             =cut
363              
364             has 'annotate_vcf' => (
365             is => 'rw',
366             isa => 'Bool',
367             default => 1,
368             required => 1,
369             );
370              
371             =head2 Internal Variables
372              
373             You shouldn't need to change these
374              
375             =cut
376              
377             has 'samples' => (
378             is => 'rw',
379             isa => 'HashRef',
380             required => 0,
381             default => sub { return {} },
382             );
383              
384             has 'orig_samples' => (
385             is => 'rw',
386             isa => 'HashRef',
387             required => 0,
388             default => sub { return {} },
389             );
390              
391             has 'file' => (
392             traits => ['String'],
393             is => 'rw',
394             isa => 'Str',
395             required => 0,
396             default => sub { return "" },
397             handles => {
398             match_file => 'match',
399             },
400             );
401              
402             has 'fname' => (
403             is => 'rw',
404             isa => 'Str',
405             required => 0,
406             default => sub { return "" },
407             );
408              
409             =head1 SUBROUTINES/METHODS
410              
411             =cut
412              
413             =head2 run
414              
415             Subroutine that starts everything off
416              
417             =cut
418              
419             sub run {
420 0     0     my($self) = @_;
421              
422 0           $self->print_opts;
423              
424 0           $self->check_files;
425 0           $self->parse_commands;
426 0           $self->find_vcfs;
427 0           $self->write_annovar;
428             }
429              
430             #Moving this over to BioX::Wrapper base class
431             #=head2 print_opts
432              
433             #Print out the command line options
434              
435             #=cut
436              
437             #sub print_opts {
438             #my($self) = @_;
439              
440             #print "## This file was generated with the options\n";
441             #for(my $x=0; $x<=$#ARGV; $x++){
442             #next unless $ARGV[$x];
443             #print "#\t$ARGV[$x]\t".$ARGV[$x+1]."\n";
444             #$x++;
445             #}
446             #print "\n";
447             #}
448              
449             =head2 check_files
450              
451             Check to make sure either an indir or vcfs are supplied
452              
453             =cut
454              
455             sub check_files{
456 0     0     my($self) = @_;
457 0           my($t);
458              
459 0 0 0       die print "Must specificy an indirectory or vcfs!\n" if (!$self->indir && !$self->vcfs);
460              
461 0 0         if($self->indir){
462 0           $t = $self->indir;
463 0           $t =~ s/\/$//g;
464 0           $self->indir($t);
465             }
466              
467 0           $t = $self->outdir;
468 0           $t =~ s/\/$//g;
469 0           $t = $t."/annovar-wrapper";
470 0           $self->outdir($t);
471              
472             #make the outdir
473 0 0         make_path($self->outdir) if ! -d $self->outdir;
474 0 0         make_path($self->outdir."/annovar_interim") if ! -d $self->outdir."/annovar_interim";
475 0 0         make_path($self->outdir."/annovar_final") if ! -d $self->outdir."/annovar_final";
476             }
477              
478             =head2 find_vcfs
479              
480             Use File::Find::Rule to find the vcfs
481              
482             =cut
483              
484             sub find_vcfs{
485 0     0     my($self) = @_;
486              
487 0 0         return if $self->vcfs;
488 0           $self->vcfs([]);
489              
490 0           my $rule = File::Find::Rule->file->name(qr/(vcf|vcf\.gz)$/)->start( $self->indir);
491 0           while ( defined ( my $file = $rule->match ) ) {
492 0           push(@{$self->vcfs}, $file);
  0            
493             }
494              
495 0 0         die print "No vcfs were found!\n" unless $self->vcfs;
496             }
497              
498             =head2 parse_commands
499              
500             Allow for giving ArrayRef either in the usual fashion or with commas
501              
502             =cut
503              
504             sub parse_commands{
505 0     0     my($self) = @_;
506              
507 0 0         if($#{$self->annovar_dbs} == 0){
  0            
508 0           my @tmp = split(",", $self->annovar_dbs->[0]);
509 0           $self->annovar_dbs(\@tmp);
510             }
511 0 0         if($#{$self->annovar_fun} == 0){
  0            
512 0           my @tmp = split(",", $self->annovar_fun->[0]);
513 0           $self->annovar_fun(\@tmp);
514             }
515              
516 0 0         return unless $self->vcfs;
517 0 0         if($#{$self->vcfs} == 0){
  0            
518 0           my @tmp = split(",", $self->vcfs->[0]);
519 0           $self->vcfs(\@tmp);
520             }
521             }
522              
523             =head2 write_annovar
524              
525             Write the commands that
526              
527             Convert the vcf file to annovar input
528             Do the annotations
529             Reannotate the vcf - if you want
530              
531             =cut
532              
533             sub write_annovar{
534 0     0     my($self) = @_;
535              
536 0 0         die print "Dbs are ".scalar @{$self->annovar_dbs}."\nand Funcs are ".scalar @{$self->annovar_fun}." \n" unless scalar @{$self->annovar_dbs} == scalar @{$self->annovar_fun};
  0            
  0            
  0            
  0            
537              
538             # #Convert vcf to annovar
539 0           print "## Converting to annovar input\n\n";
540 0           foreach my $file (@{$self->vcfs}){
  0            
541 0           print "## Processing file $file\n\n";
542              
543 0           $self->file($file);
544 0           my $tname = basename($self->file);
545 0           $tname =~ s/\.vcf$|\.vcf\.gz$//;
546 0           $self->fname($tname);
547             # $self->fname(basename($self->file));
548 0           $self->get_samples;
549 0           $self->convert_annovar;
550              
551             }
552 0           print "## Wait for all convert commands to complete\n";
553 0           print "wait\n\n";
554              
555             # #Annotate annovar input
556              
557 0           $self->iter_vcfs('table_annovar');
558              
559 0           print "## Wait for all table commands to complete\n";
560 0           print "wait\n\n";
561              
562             # #Annotate the VCF
563             # #This is only relevant for putting it back into vcfs
564              
565 0 0         return unless $self->annotate_vcf;
566              
567 0           $self->iter_vcfs('gen_descr');
568              
569 0           print "## Wait for file copying bgzip and tabix to finish...\n";
570 0           print "wait\n\n";
571              
572 0           $self->iter_vcfs('gen_annot');
573              
574 0           print "## Wait for all vcf-annotate commands to complete\n";
575 0           print "wait\n\n";
576              
577 0           $self->iter_vcfs('merge_vcfs');
578              
579             }
580              
581             =head2 iter_vcfs
582              
583             Iterate over the vcfs with some changes for lookups
584              
585             =cut
586              
587             sub iter_vcfs{
588             # my $self = shift;
589 0     0     my($self, $fun) = @_;
590              
591 0           foreach my $file (@{$self->vcfs}){
  0            
592 0           $self->file($file);
593 0           my $tname = basename($self->file);
594 0           $tname =~ s/\.vcf$|\.vcf\.gz$//;
595 0           $self->fname($tname);
596             # $self->fname(basename($self->file));
597             #Make this a parameter of the script
598 0           $self->$fun;
599             }
600              
601             }
602              
603             =head2 get_samples
604              
605             Using VCF tools get the samples listed per vcf file
606              
607             Supports files that are bgzipped or not
608              
609             Sample names are stripped of all non alphanumeric characters.
610              
611             =cut
612              
613             sub get_samples{
614 0     0     my($self) = @_;
615              
616 0           my(@samples, $vcf, $out, $fh);
617              
618             #This doesn't work in large vcf files, end up with funny blocks
619             # if($self->match_file(qr/gz$/)){
620             # $fh = new IO::Uncompress::Gunzip $self->file or warn "## File handle didn't work for gzipped file ".$self->file."\n\n";
621             # }
622             # else{
623             # $fh = new IO::File $self->file, "r" or warn "## File handle didn't work for ".$self->file."\n\n";
624             # }
625              
626             # next unless $fh;
627              
628 0           $vcf = Vcf->new(file => $self->file);
629 0           $vcf->parse_header();
630 0           (@samples) = $vcf->get_samples();
631              
632             # #TODO Have this in a proper debug msg
633             # print "##Before transform samples are :\n##".join("\n##", @samples)."\n";
634              
635             #Keep the original samples names for subsetting the vcf
636 0           my(@tmp) = @samples;
637 0           $self->orig_samples->{$self->file} = \@tmp;
638              
639             #Must keep this the same as annovar!
640             # #I think annovar got rid of these in the most recent implementation
641             # 2014-12-20 Downloaded new annovar
642             # @samples = map { s/[^A-Za-z0-9\-\.]//g; $_ } @samples;
643             # @samples = map { s/^\.//g; $_ } @samples;
644              
645             # #TODO Have this in a proper debug msg
646             # print "##After transform samples are :\n##".join("\n##", @samples)."\n";
647 0           $vcf->close();
648              
649             # #TODO Put a warning msg here
650 0 0         die print "There are no samples!\n" unless @samples;
651              
652 0           $self->samples->{$self->file} = \@samples;
653              
654 0           print "##Original samples names are :\n##".join("\n##", @{$self->orig_samples->{$self->file}})."\n";
  0            
655 0           print "##Annovar samples names are :\n##".join("\n##", @{$self->samples->{$self->file}})."\n";
  0            
656             }
657              
658             =head2 convert_annovar
659              
660             Print out the command to print the convert2annovar commands
661              
662             =cut
663              
664             sub convert_annovar{
665 0     0     my($self) = @_;
666              
667 0           print $self->convert2annovar_path." ".$self->convert2annovar_opts." ".$self->file." \\\n--outfile ".$self->outdir."/".$self->fname.".annovar\n\n";
668             }
669              
670             =head2 table_annovar
671              
672             Print out the commands to generate the annotation using table_annovar.pl command.
673              
674             =cut
675              
676             sub table_annovar{
677 0     0     my($self) = @_;
678              
679 0           print "## Generating annotations\n\n";
680 0           foreach my $sample (@{$self->samples->{$self->file}}){
  0            
681 0           print "## Processing sample $sample\n";
682 0           print $self->tableannovar_path." ".$self->outdir."/".$self->fname.".annovar.$sample.avinput \\\n ".$self->annovardb_path." --buildver ".$self->buildver." \\\n -protocol ".join(",", @{$self->annovar_dbs})." \\\n -operation ".join(",", @{$self->annovar_fun})." \\\n -nastring NA --outfile ".$self->outdir."/".$self->fname.".annovar.$sample \\\n";
  0            
  0            
683 0           print "&& find ".$self->outdir."/ |grep ".$self->outdir."/".$self->fname.".annovar.$sample | grep -v \"multianno\" | xargs -i -t mv {} ".$self->outdir."/annovar_interim \\\n";
684 0           print "&& find ".$self->outdir."/ |grep ".$self->outdir."/".$self->fname.".annovar.$sample | grep \"avinput\$\" | xargs -i -t mv {} ".$self->outdir."/annovar_interim \\\n";
685 0           print "&& find ".$self->outdir."/ |grep ".$self->outdir."/".$self->fname.".annovar.$sample | grep \"multianno\" | xargs -i -t mv {} ".$self->outdir."/annovar_final\n\n";
686             }
687              
688             }
689              
690             =head2 vcf_annotate
691              
692             Generate the commands to annotate the vcf file using vcf-annotate
693              
694             =cut
695              
696             sub vcf_annotate{
697 0     0     my($self) = @_;
698              
699 0           $self->gen_descr;
700 0           $self->gen_annot;
701              
702              
703 0           $self->merge_vcfs;
704             }
705              
706             =head2 gen_descr
707              
708             Bgzip, tabix, all of vcftools, and sort must be in your PATH for these to work.
709              
710              
711             There are two parts to this command.
712              
713             The first prepares the annotation file.
714              
715             1. The annotation file is backed up just in case
716             2. The annotation file is sorted, because I had some problems with sorting
717             3. The annotation file is bgzipped, as required by vcf-annotate
718             4. The annotation file is tabix indexed using the special commands -s 1 -b 2 -e 3
719              
720             The second writes out the vcf-annotate commands
721              
722             Example with RefGene
723             zcat ../../variants.vcf.gz | vcf-annotate -a sorted.annotation.gz \
724             -d key=INFO,ID=SAMPLEID_Func_refGene,Number=0,Type=String,Description='SAMPLEID Annovar Func_refGene' \
725             -d key=INFO,ID=SAMPLEID_Gene_refGene,Number=0,Type=String,Description='SAMPLEID Annovar Gene_refGene' \
726             -d key=INFO,ID=SAMPLEID_ExonicFun_refGene,Number=0,Type=String,Description='SAMPLEID Annovar ExonicFun_refGene' \
727             -d key=INFO,ID=SAMPLEID_AAChange_refGene,Number=0,Type=String,Description='SAMPLEID Annovar AAChange_refGene' \
728             -c CHROM,FROM,TO,-,-,INFO/SAMPLEID_Func_refGene,INFO/SAMPLEID_Gene_refGene,INFO/SAMPLEID_ExonicFun_refGene,INFO/SAMPLEID_AAChange_refGene > SAMPLEID.annotated.vcf
729              
730             =cut
731              
732             sub gen_descr{
733 0     0     my($self) = @_;
734              
735 0           print "##Prepare to reannotate VCF files\n\n";
736              
737 0 0         make_path($self->outdir."/vcf-annotate_interim") if ! -d $self->outdir."/vcf-annotate_interim";
738 0 0         make_path($self->outdir."/vcf-annotate_final") if ! -d $self->outdir."/vcf-annotate_final";
739              
740 0           foreach my $sample (@{$self->samples->{$self->file}}){
  0            
741 0           print "cp ".$self->outdir."/annovar_final/".$self->fname.".annovar.$sample.hg19_multianno.txt \\\n";
742 0           print $self->outdir."/vcf-annotate_interim/".$self->fname.".annovar.$sample.hg19_multianno.txt \\\n";
743 0           print "&& sed -i 's/;/,/;s/=/->/;s/GERP++/GERPPP/;s/+/P/'g ".$self->outdir."/vcf-annotate_interim/".$self->fname.".annovar.$sample.hg19_multianno.txt \\\n";
744 0           print "&& sort -k1,1 -k2,2n ".$self->outdir."/vcf-annotate_interim/".$self->fname.".annovar.$sample.hg19_multianno.txt > ";
745 0           print $self->outdir."/vcf-annotate_interim/".$self->fname.".sorted.annovar.$sample.hg19_multianno.txt \\\n";
746 0           print "&& bgzip -f ".$self->outdir."/vcf-annotate_interim/".$self->fname.".sorted.annovar.$sample.hg19_multianno.txt \\\n";
747 0           print "&& tabix -s 1 -b 2 -e 3 ".$self->outdir."/vcf-annotate_interim/".$self->fname.".sorted.annovar.$sample.hg19_multianno.txt.gz \n\n";
748             }
749              
750             }
751              
752             =head2 gen_annot
753              
754             =cut
755              
756             sub gen_annot {
757 0     0     my $self = shift;
758              
759 0           print "##Reannotate VCF files\n\n";
760 0           foreach my $sample (@{$self->samples->{$self->file}}){
  0            
761 0 0         if($self->match_file(qr/gz$/)){
762 0           print "zcat ".$self->file." | ";
763             }
764             else{
765 0           print "cat ".$self->file." | ";
766             }
767 0           print "vcf-annotate -a ".$self->outdir."/vcf-annotate_interim/".$self->fname.".sorted.annovar.$sample.hg19_multianno.txt.gz \\\n";
768              
769 0           foreach my $db (@{$self->annovar_dbs}){
  0            
770 0 0         if(exists $self->annovar_cols->{$db}){
771 0           my $tmp = $self->annovar_cols->{$db};
772              
773             #Test this!!!
774 0           $db =~ s/\+/P/g;
775 0           $db =~ s/\W/_/g;
776              
777 0           foreach my $t (@$tmp){
778 0           $t =~ s/\+/P/g;
779 0           $t =~ s/\W/_/g;
780 0           print <<EOF;
781             -d key=INFO,ID=$sample.annovar.$db.$t,Number=0,Type=String,Description='Annovar $sample $db $t' \\
782             EOF
783             }
784             }
785             else{
786 0           print <<EOF;
787             -d key=INFO,ID=$sample.annovar.$db,Number=0,Type=String,Description='Annovar gen $sample $db' \\
788             EOF
789             }
790              
791             }
792              
793 0           $self->gen_cols($sample);
794             }
795              
796             }
797              
798             =head2 gen_cols
799              
800             Generate the -c portion of the vcf-annotate command
801              
802             =cut
803              
804             sub gen_cols{
805 0     0     my($self, $sample) = @_;
806              
807 0           my $cols = "-c CHROM,FROM,TO,-,-";
808              
809 0           foreach my $db (@{$self->annovar_dbs}){
  0            
810 0 0         if(exists $self->annovar_cols->{$db}){
811 0           my $tmp = $self->annovar_cols->{$db};
812              
813 0           foreach my $t (@$tmp){
814 0           $cols .= ",INFO/$sample.annovar.$db.$t";
815             }
816             }
817             else{
818 0           $cols .= ",INFO/$sample.annovar.$db";
819             }
820              
821             }
822              
823 0           print $cols." | bgzip -f -c > ".$self->outdir."/vcf-annotate_final/".$self->fname.".$sample.annovar.vcf.gz && \\\n";
824 0           print "tabix -p vcf ".$self->outdir."/vcf-annotate_final/".$self->fname.".$sample.annovar.vcf.gz\n\n";
825              
826             }
827              
828             =head2 merge_vcfs
829              
830             There is one vcf-annotated file per sample, so merge those at the the end to get a multisample file using vcf-merge
831              
832             =cut
833              
834             sub merge_vcfs {
835 0     0     my($self) = @_;
836              
837 0 0         return if scalar @{$self->samples->{$self->file}} == 1;
  0            
838              
839 0           print "##Merge single sample VCF files\n\n";
840              
841 0           print "vcf-merge \\\n";
842 0           foreach my $sample (@{$self->samples->{$self->file}}){
  0            
843 0           print $self->outdir."/vcf-annotate_final/".$self->fname.".$sample.annovar.vcf.gz \\\n";
844             }
845 0           print " | bgzip -f -c > ".$self->outdir."/vcf-annotate_interim/".$self->fname.".allsamples.annovar.vcf.gz \\\n";
846 0           print "&& tabix -p vcf ".$self->outdir."/vcf-annotate_interim/".$self->fname.".allsamples.annovar.vcf.gz\n";
847              
848 0           print "\nwait\n\n";
849              
850 0           $self->subset_vcfs();
851              
852             }
853              
854             =head2 subset_vcfs
855              
856             vcf-merge used in this fashion will create a lot of redundant columns, because it wants to assume all sample names are unique
857              
858             Straight from the vcftools documentation
859              
860             vcf-subset -c NA0001,NA0002 file.vcf.gz | bgzip -c > out.vcf.gz
861              
862             =cut
863              
864             sub subset_vcfs {
865 0     0     my($self) = @_;
866              
867 0           print "##Subsetting the files to get rid of redundant info\n\n";
868              
869 0           my $str = join(",", @{$self->orig_samples->{$self->file}});
  0            
870              
871 0           print "vcf-subset -c $str ".$self->outdir."/vcf-annotate_interim/".$self->fname.".allsamples.annovar.vcf.gz | bgzip -f -c > ".$self->outdir."/vcf-annotate_final/".$self->fname.".allsamples.nonredundant.annovar.vcf.gz \\\n";
872 0           print "&& tabix -p vcf ".$self->outdir."/vcf-annotate_final/".$self->fname.".allsamples.nonredundant.annovar.vcf.gz\n";
873              
874 0           print "## Finished processing file ".$self->file."\n\n";
875             }
876              
877              
878              
879             =head1 AUTHOR
880              
881             Jillian Rowe, C<< <jillian.e.rowe at gmail.com> >>
882              
883             =head1 BUGS
884              
885             Please report any bugs or feature requests to C<bug-annovar-wrapper at rt.cpan.org>, or through
886             the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Annovar-Wrapper>. I will be notified, and then you'll
887             automatically be notified of progress on your bug as I make changes.
888              
889             =head1 SUPPORT
890              
891             You can find documentation for this module with the perldoc command.
892              
893             perldoc Annovar::Wrapper
894              
895              
896             You can also look for information at:
897              
898             =over 4
899              
900             =item * RT: CPAN's request tracker (report bugs here)
901              
902             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Annovar-Wrapper>
903              
904             =item * AnnoCPAN: Annotated CPAN documentation
905              
906             L<http://annocpan.org/dist/Annovar-Wrapper>
907              
908             =item * CPAN Ratings
909              
910             L<http://cpanratings.perl.org/d/Annovar-Wrapper>
911              
912             =item * Search CPAN
913              
914             L<http://search.cpan.org/dist/Annovar-Wrapper/>
915              
916             =back
917              
918              
919             =head1 ACKNOWLEDGEMENTS
920              
921             This module is a wrapper around the well developed annovar pipeline. The
922             commands come straight from the documentation.
923              
924             This module was originally developed at and for Weill Cornell Medical College
925             in Qatar within ITS Advanced Computing Team and scientific input from Khalid
926             Fahkro. With approval from WCMC-Q, this information was generalized and put on
927             github, for which the authors would like to express their gratitude.
928              
929             =head1 LICENSE AND COPYRIGHT
930              
931             Copyright 2014 Weill Cornell Medical College Qatar.
932              
933             This program is free software; you can redistribute it and/or modify it
934             under the terms of the the Artistic License (2.0). You may obtain a
935             copy of the full license at:
936              
937             L<http://www.perlfoundation.org/artistic_license_2_0>
938              
939             Any use, modification, and distribution of the Standard or Modified
940             Versions is governed by this Artistic License. By using, modifying or
941             distributing the Package, you accept this license. Do not use, modify,
942             or distribute the Package, if you do not accept this license.
943              
944             If your Modified Version has been derived from a Modified Version made
945             by someone other than you, you are nevertheless required to ensure that
946             your Modified Version complies with the requirements of this license.
947              
948             This license does not grant you the right to use any trademark, service
949             mark, tradename, or logo of the Copyright Holder.
950              
951             This license includes the non-exclusive, worldwide, free-of-charge
952             patent license to make, have made, use, offer to sell, sell, import and
953             otherwise transfer the Package with respect to any patent claims
954             licensable by the Copyright Holder that are necessarily infringed by the
955             Package. If you institute patent litigation (including a cross-claim or
956             counterclaim) against any party alleging that the Package constitutes
957             direct or contributory patent infringement, then this Artistic License
958             to you shall terminate on the date that such litigation is filed.
959              
960             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
961             AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
962             THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
963             PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
964             YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
965             CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
966             CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
967             EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
968              
969              
970             =cut
971              
972             1; # End of Annovar::Wrapper