File Coverage

blib/lib/Bio/Palantir.pm
Criterion Covered Total %
statement 15 15 100.0
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 20 20 100.0


line stmt bran cond sub pod time code
1             package Bio::Palantir;
2             # ABSTRACT: core classes and utilities for Bio::Palantir
3             # CONTRIBUTOR: Denis BAURAIN <denis.baurain@uliege.be>
4             $Bio::Palantir::VERSION = '0.200700';
5 1     1   216779 use strict; use warnings;
  1     1   10  
  1         23  
  1         4  
  1         2  
  1         20  
6              
7 1     1   695 use Bio::Palantir::Parser; use Bio::Palantir::Refiner; use
  1     1   3  
  1         30  
  1         444  
  1         4  
  1         49  
8 1     1   460 Bio::Palantir::Explorer;
  1         4  
  1         56  
9              
10             1;
11              
12             __END__
13              
14             =pod
15              
16             =head1 NAME
17              
18             Bio::Palantir - core classes and utilities for Bio::Palantir
19              
20             =head1 VERSION
21              
22             version 0.200700
23              
24             =head1 SYNOPSIS
25              
26             use Bio::Palantir;
27              
28             # open and parse biosynML.xml or regions.js antiSMASH report
29             my $infile = 'biosynML.xml';
30             my $report = Bio::Palantir::Parser->new( file => $infile );
31              
32             # get main container
33             my $root = $report->root;
34              
35             # explore Biosynthetic Gene Clusters (BGCs) content
36            
37             # Bio::Palantir::Parser
38             for my $cluster ($root->all_clusters) { # returns all clusters say
39             $cluster->type; # returns the cluster type (e.g., nrps)
40            
41             for my $gene ($cluster->all_genes) { # returns all genes say
42             $gene->name; # for instance, returns the gene name say $gene->genomic_coordinates; # returns DNA gene coordinates (relative to the genome)
43             say $gene->coordinates; # returns protein gene coordinates (also relative to the genome)
44             say $gene->protein_sequence; # returns the gene protein sequence
45            
46             # if the BGC possess domains (i.e., NRPS/PKS)
47             for my $domain ($gene->all_domains) { # returns all domains
48            
49             say $domain->rank; # for instance, returns the domain in the gene
50             say $domain->function; # returns the domain function (e.g., condensation)
51             say join '-', $domain->coordinates; # returns the coordinates (which are relative to the gene ones)
52             say $domain->protein_sequence; # returns the domain protein sequence
53              
54             # lowest level is Motifs (for antiSMASH 3 and 4)
55             for my $motif ($domain->all_motifs) {
56             #...
57             }
58             }
59              
60             # same way for looping into Module objects
61             for my $module ($cluster->all_modules) {
62             # ...
63             }
64             }
65              
66              
67             # Bio::Palantir::Refiner
68             use aliased 'Bio::Palantir::Refiner';
69             use aliased 'Bio::Palantir::Refiner::ClusterPus';
70            
71             # it is possible to create Bio::Palantir::Refiner objects from already existing Bio::Palantir::Parser ones
72             my @cluster_plus;
73            
74             for my $cluster ($root->all_clusters) {
75             push @cluster_plus, ClusterPlus->new( _cluster => $cluster );
76             }
77              
78             # but if you intend to use the Refiner part, it is more convenient to create the Refiner object directly from a file
79             my $report = Refiner->new( file => biosynML.xml);
80              
81             for my $cluster_plus ($report->all_clusters) {
82            
83             say $cluster_plus->type;
84              
85             for my $gene_plus ($cluster_plus->all_genes) {
86              
87             say $gene_plus->name;
88              
89             for my $domain_plus ($gene_plus->all_domains) {
90            
91             say 'Palantir version:';
92             say $domain_plus->function;
93             say $domain_plus->coordinates;
94             say $domain_plus->evalue;
95            
96             # compare with antiSMASH results
97             say 'antiSMASH version:'; say $domain_plus->_domain->function;
98             say $domain_plus->_domain->coordinates;
99             # say $domain_plus->evalue; # only available for Palantir part
100              
101             }
102              
103             }
104              
105             }
106              
107              
108             # Bio::Palantir::Explorer
109             use aliased 'Bio::Palantir::Explorer::ClusterFasta';
110            
111             # from a Bio::Palantir::Refiner object
112             for my $cluster_plus ($report->all_clusters) {
113            
114             for my $gene_plus ($report->all_genes) {
115              
116             for my $domain_exp ($gene_plus->all_exp_domains) {
117              
118             say $domain_exp->function;
119             say $domain_exp->coordinates;
120             say $domain_exp->evalue;
121              
122             }
123              
124             }
125              
126             }
127              
128             # from a FASTA file (containing ONLY one BGC, each sequence being interpreted as a gene from the cluster)
129             my $cluster_exp = ClusterFasta->new( fasta => nrps_bgc.fasta );
130              
131             for my $gene_exp ($cluster_exp->all_genes) {
132              
133             for my $domain_exp ($gene_exp->all_domains) {
134            
135             say $domain_exp->function;
136             say $domain_exp->coordinates;
137             say $domain_exp->evalue;
138              
139             }
140              
141             }
142              
143             =head1 DESCRIPTION
144              
145             This distribution is the base of the C<Bio::Palantir> module collection designed
146             as a toolbox for handling the post-processing of antiSMASH report data
147             (L<https://antismash.secondarymetabolites.org>) and improving in some aspects
148             its annotation of NRPS/PKS Biosynthetic Gene Clusters (BGCs), aiming then to
149             support small and large-scale genome mining projects.
150              
151             The B<Palantir libraries> are organized as follows:
152              
153             C<Bio::Palantir::Parser> contains classes for hierarchically storing the
154             information of antiSMASH gene clusters.
155              
156             C<Bio::Palantir::Refiner> consists in classes (parallel to Parser) dedicated to
157             the improvement of NRPS/PKS gene clusters parallel classes to
158             Bio::Palantir::Parser.
159              
160             C<Bio::Palantir::Explorer> contains classes (also parallel to Parser) giving
161             access to an exploratory version of detected domains
162              
163             More information on their internal structure can be found in their respective
164             file.
165              
166             Here is the list of functionalities offered by Palantir libraries and bins:
167              
168             Refinement of NRPS/PKS BGC annotations
169              
170             - B<Dynamic elongation of the coordinates of core domains>: enrich the
171             information contained in the sequences (application examples: improved
172             similarity searches and evolutionary approaches)
173              
174             - B<Filling the gaps in BGC annotation>: retrieve missed domains from exceptions
175             in the rules detection (application example: resolution of ambiguous or
176             incoherent BGC annotation)
177              
178             - B<Module delimitation>: apply biological rules to group domains in modules
179             (application example: analyses at module scale)
180              
181             - B<BGC visualization>: visualize and compare antismash and Palantir annotations
182             [bin/draw_clusters.pl]
183              
184             - B<Exploratory mode visualization>: visualize and design the domain
185             architecture consensus from a raw view of all detected signatures (application
186             example: manual curation of the domaine architecture consensus)
187              
188             BGC data manipulation
189              
190             - B<Generation of PDF/Word reports>: export customizable reports of refined BGC
191             data (application example: manual reading of numerous (filtered) BGC data)
192              
193             - B<Extraction of sequences>: export Fasta files from BGC data at different
194             scales: cluster, gene, module, domain (application example: data formatting for
195             downstream analyses)
196              
197             - B<Generation of SQL tables>: export SQL tables containing BGC data details
198             (application example: large-scale queries and statistics)
199              
200             =head1 AUTHOR
201              
202             Loic MEUNIER <lmeunier@uliege.be>
203              
204             =head1 CONTRIBUTOR
205              
206             =for stopwords Denis BAURAIN
207              
208             Denis BAURAIN <denis.baurain@uliege.be>
209              
210             =head1 COPYRIGHT AND LICENSE
211              
212             This software is copyright (c) 2019 by University of Liege / Unit of Eukaryotic Phylogenomics / Loic MEUNIER and Denis BAURAIN.
213              
214             This is free software; you can redistribute it and/or modify it under
215             the same terms as the Perl 5 programming language system itself.
216              
217             =cut