File Coverage

blib/lib/Bio/ViennaNGS.pm
Criterion Covered Total %
statement 3 3 100.0
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 4 4 100.0


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2015-01-08 23:54:55 mtw>
3              
4             package Bio::ViennaNGS;
5              
6 1     1   8 use version; our $VERSION = qv('0.12_09');
  1         1  
  1         5  
7              
8             1;
9              
10             =head1 NAME
11              
12             Bio::ViennaNGS - A Perl distribution for Next-Generation Sequencing
13             (NGS) data analysis
14              
15             =head1 DESCRIPTION
16              
17             Bio::ViennaNGS is a distribution of Perl modules and utilities for building
18             efficient Next-Generation Sequencing (NGS) analysis pipelines. It covers
19             various aspects of NGS data analysis, including (but not limited to)
20             conversion of sequence annotation, evaluation of mapped data, expression
21             quantification and visualization.
22              
23             The main Bio::ViennaNGS module is shipped with a complementary set of
24             (sub)modules:
25              
26             =over
27              
28             =item L<Bio::ViennaNGS::AnnoC>: A Moose interface for storage and
29             conversion of sequence annotation data.
30              
31             =item L<Bio::ViennaNGS::BamStat>: A L<Moose> based class for
32             collecting mapping statistics.
33              
34             =item L<Bio::ViennaNGS::BamStatSummary>: A L<Moose> interface for
35             processing L<Bio::ViennaNGS::BamStat> objects on a set of BAM files.
36              
37             =item L<Bio::ViennaNGS::Fasta>: Routines for accessing genomic
38             sequences implemented through a L<Moose> interface to
39             L<Bio::DB::Fasta>.
40              
41             =item L<Bio::ViennaNGS::Feature>: A L<Moose> based BED6 wrapper.
42              
43             =item L<Bio::ViennaNGS::FeatureChain>: Yet another L <Moose> class for
44             chaining gene annotation features.
45              
46             =item L<Bio::ViennaNGS::MinimalFeature>: A L<Moose> interface for
47             handling elementary gene annotation.
48              
49             =item L<Bio::ViennaNGS::SpliceJunc>: A collection of routines for
50             alternative splicing analysis.
51              
52             =item L<Bio::ViennaNGS::Tutorial>: A comprehensive tutorial of the
53             L<Bio::ViennaNGS> core routines with real-world NGS data.
54              
55             =item L<Bio::ViennaNGS::UCSC>: Routines for visualization of genomics
56             data with the UCSC genome browser.
57              
58             =item L<Bio::ViennaNGS::Util>: A collection of wrapper routines for
59             commonly used third-party NGS utilities, code for normalization of
60             gene expression values based on read count data and a set of utility
61             functions.
62              
63             =back
64              
65             =head1 UTILITIES
66              
67             L<Bio::ViennaNGS> comes with a collection of command line utilities
68             for accomplishing routine tasks often required in NGS data
69             processing. These utilities serve as reference implementation of the
70             routines implemented throughout the modules and can readily be used
71             for atomic tasks in NGS data processing:
72              
73             =over
74              
75             =item F<assembly_hub_constructor.pl>: The UCSC genome browser offers
76             the possibility to visualize any organism (including organisms that
77             are not included in the standard UCSC browser bundle) through hso
78             called 'Assembly Hubs'. This script constructs Assembly Hubs from
79             genomic sequence and annotation data.
80              
81             =item F<bam_split.pl>: Split (paired-end and single-end) BAM alignment
82             files by strand and compute statistics. Optionally create BED output,
83             as well as normalized bedGraph and bigWig files for coverage
84             visualization in genome browsers (see dependencies on third-patry
85             tools below).
86              
87             =item F<bam_to_bigWig.pl>: Produce bigWig coverage profiles from
88             (aligned) BAM files, explicitly considering strandedness. The most
89             natural use case of this tool is to create strand-aware coverage
90             profiles in bigWig format for genome browser visualization.
91              
92             =item F<bam_uniq.pl>: Extract unique and multi mapping reads from BAM
93             alignment files and create a separate BAM file for both uniqe (.uniq.)
94             and multi (.mult.) mappers.
95              
96             =item F<bed2bedGraph.pl>: Convert BED files to (strand specific)
97             bedGraph files, allowing additional annotation and automatic
98             generation of bedGraph files which can easily be converted to big-type
99             files for easy UCSC visualization.
100              
101             =item F<extend_bed.pl>: Extend genomic features in BED files by a
102             certain number of nucleotides, either on both sides or specifically at
103             the 5' or 3' end, respectively.
104              
105             =item F<gff2bed.pl>: Convert RefSeq GFF3 annotation files to BED12
106             format. Individual BED12 files are created for each feature type
107             (CDS/tRNA/rRNA/etc.). Tested with RefSeq bacterial GFF3 annotation.
108              
109             =item F<kmer_analysis.pl>: Count k-mers of predefined length in FastQ
110             and Fasta files
111              
112             =item F<MEME_XML_motif_extractor.pl>: Compute simple statistics from
113             MEME XML output and return a list of found motifs with the number of
114             sequences containing those motifs as well as nice ggplot graphs.
115              
116             =item F<motiffinda.pl>: Find motifs in annotated sequence
117             features. The motif can be provided as regular expression.
118              
119             =item F<newUCSCdb.pl>: Create a new genome database (ie. add a novel
120             organism) in a local instance of the UCSC genome browser.
121              
122             =item F<normalize_multicov.pl>: Compute normalized expression data in
123             TPM/RPKM from (raw) read counts in bedtools multicov format. TPM
124             reference: Wagner et al, Theory Biosci. 131(4), pp 281-85 (2012)
125              
126             =item F<sj_visualizer.pl>: Convert splice junctions from mapped
127             RNA-seq data in segemehl BED6 splice junction format to BED12 for easy
128             visualization in genome Browsers.
129              
130             =item F<splice_site_summary.pl>: Identify and characterize splice
131             junctions from RNA-seq data by intersecting them with annotated splice
132             junctions.
133              
134             =item F<newUCSCdb.pl>: Create a new genome database for a locally
135             installed instance of the UCSC genome browser. Based on
136             http://genomewiki.ucsc.edu/index.php/Building_a_new_genome_database
137              
138             =item F<trim_fastq.pl>: Trim sequence and quality string fields in a
139             Fastq file by user defined length.
140              
141             =back
142              
143             =head1 DEPENDENCIES
144              
145             The L<Bio::ViennaNGS> modules and classes depend on a set of Perl
146             modules, some of which are part of the Perl core distribution:
147              
148             =over 16
149              
150             =item L<Bio::Perl> >= 1.00690001
151              
152             =item L<Bio::DB::Sam> >= 1.39
153              
154             =item L<Bio::DB::Fasta>
155              
156             =item L<Bio::Tools::GFF>
157              
158             =item L<File::Basename>
159              
160             =item L<File::Share>
161              
162             =item L<File::Temp>
163              
164             =item L<Path::Class>
165              
166             =item L<IPC::Cmd>
167              
168             =item L<Carp>
169              
170             =item L<Template>
171              
172             =item L<Moose>
173              
174             =item L<Moose::Util::TypeConstraints>
175              
176             =item L<namespace::autoclean>
177              
178             =item L<MooseX::Clone>
179              
180             =item L<Tie::Hash::Indexed>
181              
182             =back
183              
184             In addition the following modules are required by the L<Bio::ViennaNGS> utilities:
185              
186             =over 4
187              
188             =item L<PerlIO::gzip>
189              
190             =item L<Math::Round>
191              
192             =item L<XML::Simple>
193              
194             =item L<Statistics::R>
195              
196             =back
197              
198             L<Bio::ViennaNGS> uses third-party tools for computing intersections
199             of BED files: F<bedtools intersect> from the
200             L<BEDtools|http://bedtools.readthedocs.org/en/latest/content/tools/intersect.html>
201             suite is used to compute overlaps and F<bedtools sort> is used to sort
202             BED output files. Make sure that those third-party utilities are
203             available on your system, and that hey can be found and executed by
204             the Perl interpreter. We recommend installing the latest version of
205             L<BEDtools|https://github.com/arq5x/bedtools2> on your system.
206              
207             =head1 SOURCE AVAILABILITY
208              
209             Source code for this distribution is available from the L<ViennaNGS
210             Github repository|https://github.com/mtw/Bio-ViennaNGS>.
211              
212             =head1 PAPERS
213              
214             If the L<Bio::ViennaNGS> suite is useful for your work or if you use
215             any component of the distribution in a custom pipeline, please cite
216             the following publication:
217              
218             B<"ViennaNGS - A toolbox for building efficient next-generation sequencing
219             analysis pipelines">
220              
221             I<Michael T. Wolfinger, Joerg Fallmann, Florian Eggenhofer and Fabian Amman>
222              
223             bioRxiv L<doi:10.1101/013011|http://dx.doi.org/10.1101/013011>.
224              
225             =head1 NOTES
226              
227             The L<Bio::ViennaNGS> suite is actively developed and tested on
228             different flavours of Linux and Mac OS X. We have taken care of
229             writing platform-independent code that should run out of the box on
230             most UNIX-based systems, however we do not have access to machines
231             running Microsoft Windows. As such, we have not tested and will not
232             test Windows compatibility.
233              
234             =head1 SEE ALSO
235              
236             =over 13
237              
238             =item L<Bio::ViennaNGS::AnnoC>
239              
240             =item L<Bio::ViennaNGS::Bam>
241              
242             =item L<Bio::ViennaNGS::BamStat>
243              
244             =item L<Bio::ViennaNGS::BamStatSummary>
245              
246             =item L<Bio::ViennaNGS::Fasta>
247              
248             =item L<Bio::ViennaNGS::Feature>
249              
250             =item L<Bio::ViennaNGS::FeatureChain>
251              
252             =item L<Bio::ViennaNGS::MinimalFeature>
253              
254             =item L<Bio::ViennaNGS::SpliceJunc>
255              
256             =item L<Bio::ViennaNGS::Util>
257              
258             =item L<Bio::ViennaNGS::Tutorial>
259              
260             =item L<Bio::ViennaNGS::UCSC>
261              
262             =item L<Bio::ViennaNGS::Util>
263              
264             =back
265              
266             =head1 AUTHORS
267              
268             =over
269              
270             =item Michael T. Wolfinger E<lt>michael@wolfinger.euE<gt>
271              
272             =item Jörg Fallmann E<lt>fall@tbi.univie.ac.atE<gt>
273              
274             =item Florian Eggenhofer E<lt>florian.eggenhofer@tbi.univie.ac.atE<gt>
275              
276             =item Fabian Amman E<lt>fabian@tbi.univie.ac.at<gt>
277              
278             =back
279              
280             =head1 COPYRIGHT AND LICENSE
281              
282             Copyright (C) 2014 Michael T. Wolfinger E<lt>michael@wolfinger.euE<gt>
283              
284             This library is free software; you can redistribute it and/or modify
285             it under the same terms as Perl itself, either Perl version 5.12.4 or,
286             at your option, any later version of Perl 5 you may have available.
287              
288             This software is distributed in the hope that it will be useful, but
289             WITHOUT ANY WARRANTY; without even the implied warranty of
290             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
291              
292             =cut
293              
294