File Coverage

blib/lib/Bio/ViennaNGS/AnnoC.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2014-10-24 10:31:25 mtw>
3              
4             package Bio::ViennaNGS::AnnoC;
5              
6 1     1   15409 use 5.12.0;
  1         4  
  1         32  
7 1     1   408 use version; our $VERSION = qv('0.08');
  1         1342  
  1         5  
8 1     1   253 use Bio::ViennaNGS qw(sortbed);
  0            
  0            
9             use Bio::Tools::GFF;
10             use Path::Class;
11             use Carp;
12             use Moose;
13             use namespace::autoclean;
14              
15             has 'accession' => (
16             is => 'rw',
17             isa => 'Str',
18             predicate => 'has_accession',
19             );
20              
21             has 'features' => (
22             is => 'ro',
23             isa => 'HashRef',
24             predicate => 'has_features',
25             default => sub { {} },
26             );
27              
28             has 'nr_features' => (
29             is => 'ro',
30             isa => 'Int',
31             builder => '_get_nr_of_features',
32             lazy => 1,
33             );
34              
35             has 'featstat' => (
36             is => 'ro',
37             isa => 'HashRef',
38             builder => '_set_featstat',
39             predicate => 'has_featstat',
40             lazy => 1,
41             );
42              
43             before 'featstat' => sub {
44             my $self = shift;
45             $self->_get_nr_of_features();
46             };
47              
48             sub _set_featstat {
49             my $self = shift;
50             my $this_function = (caller(0))[3];
51             my %fs = ();
52             confess "ERROR [$this_function] \$self->features not available"
53             unless ($self->has_features);
54             $fs{total} = 0;
55             $fs{origin} = "$this_function ".$VERSION;
56             $fs{count} = $self->nr_features;
57             foreach my $uid ( keys %{$self->features} ){
58             my $gbkey = ${$self->features}{$uid}->{gbkey};
59             $fs{total} += 1;
60             unless (exists $fs{$gbkey}){
61             $fs{$gbkey} = 0;
62             }
63             $fs{$gbkey} += 1;
64             }
65             return \%fs;
66             }
67              
68             sub _get_nr_of_features {
69             my $self = shift;
70             my $this_function = (caller(0))[3];
71             confess "ERROR [$this_function] \$self->features not available"
72             unless ($self->has_features);
73             return (keys %{$self->features});
74             }
75              
76             sub parse_gff {
77             my ($self,$in_file) = @_;
78             my ($i,$gffio,$header,$f,$gbkey);
79             my $this_function = (caller(0))[3];
80              
81             $gffio = Bio::Tools::GFF->new(-file => $in_file,
82             -gff_version => 3,
83             );
84             $gffio->ignore_sequence(1);
85             if ($header = $gffio->next_segment() ){
86             $self->accession( $header->display_id() );
87             }
88             else{ carp "ERROR [$this_function] Cannot parse GFF header\n" }
89              
90             while($f = $gffio->next_feature()) {
91             my ($uid,$feat_name);
92             my @name = my @id = my @gbkeys = ();
93              
94             next if ($f->primary_tag() eq "exon");
95              
96             # 1) determine gbkey of the current feature
97             @gbkeys = $f->get_tag_values("gbkey");
98             $gbkey = $gbkeys[0];
99              
100             # 2) get a unique ID for each feature
101             if ($f->has_tag('ID')){
102             @id = $f->get_tag_values('ID');
103             $uid = $id[0]; # ID=id101
104             }
105             else {
106             croak "ERROR [$this_function] Feature '$gbkey' at pos.\
107             $f->start does not have \'ID\' attribute\n";
108             }
109              
110             # 3) assign parent's unique ID in case a parent record exists
111             if ($f->has_tag('Parent')){
112             @id = $f->get_tag_values('Parent');
113             $uid = $id[0]; # ID=id101
114             }
115              
116             # 4) find a name for the current feature, use 'Name' or 'ID' attribute
117             if ($f->has_tag('Name')){
118             @name = $f->get_tag_values('Name');
119             $feat_name = $name[0];
120             }
121             elsif ($f->has_tag('ID')){
122             @id = $f->get_tag_values('ID');
123             $feat_name = $id[0]; # ID=id101, use ID as feature name
124             }
125             else {
126             croak "ERROR [$this_function] Cannot set name for feature \
127             $f->gbkey at pos. $f->start\n";
128             }
129              
130             unless (exists ${$self->features}{$uid}) { # gene / ribosome_entry_site / etc.
131             ${$self->features}{$uid}->{start} = $f->start;
132             ${$self->features}{$uid}->{end} = $f->end;
133             ${$self->features}{$uid}->{strand} = $f->strand;
134             ${$self->features}{$uid}->{length} = $f->length;
135             ${$self->features}{$uid}->{seqid} = $f->seq_id;
136             ${$self->features}{$uid}->{score} = $f->score || 0;
137             ${$self->features}{$uid}->{gbkey} = $gbkey;
138             ${$self->features}{$uid}->{name} = $feat_name;
139             ${$self->features}{$uid}->{uid} = $uid;
140             }
141             else { # CDS / tRNA / rRNA / etc.
142             ${$self->features}{$uid}->{gbkey} = $gbkey; # gbkey for tRNA/ rRNA/ CDS etc
143             }
144             }
145             $gffio->close();
146             }
147              
148             sub features2bed {
149             my ($self,$gbkey,$dest,$bn,$log) = @_;
150             my ($chrom,$chrom_start,$chrom_end,$name,$score,$strand,$thick_start);
151             my ($thick_end,$reserved,$block_count,$block_sizes,$block_starts);
152             my @ft = ();
153             my $this_function = (caller(0))[3];
154              
155             croak "ERROR [$this_function] $self->features not available"
156             unless ($self->has_features);
157             croak "ERROT [$this_function] $self->featstat not available"
158             unless ($self->has_featstat);
159             croak "ERROR [$this_function] $dest does not exist"
160             unless (-d $dest);
161             if (defined $log){open(LOG, ">>", $log) or croak $!;}
162              
163             if (defined $gbkey){ # dump features of just one genbank key
164             confess "ERROR [$this_function] genbank key \'$gbkey\' N/A in hash "
165             unless (exists ${$self->featstat}{$gbkey});
166             $ft[0] = $gbkey;
167             }
168             else{ # dump features for all genbank keys
169             foreach my $gbk (keys %{$self->featstat}) {
170             next if ($gbk eq 'total' || $gbk eq 'Src' || $gbk eq 'accession' ||
171             $gbk eq 'origin' || $gbk eq 'count');
172             push @ft,$gbk;
173             }
174             }
175              
176             foreach my $f (@ft){
177             my $bedname = file($dest,"$bn.$f.bed");
178             my $bedname_u = file($dest,"$bn.$f.u.bed");
179             open (BEDOUT, "> $bedname_u") or croak $!;
180              
181             # dump unsorted gene annotation from DS to BED12
182             foreach my $uid (keys %{$self->features}){
183             next unless (${$self->features}{$uid}->{gbkey} eq $f);
184             my @bedline = ();
185             $chrom = ${$self->features}{$uid}->{seqid};
186             $chrom_start = ${$self->features}{$uid}->{start};
187             $chrom_start--; # BED is 0-based
188             $chrom_end = ${$self->features}{$uid}->{end};
189             $name = ${$self->features}{$uid}->{name};
190             $score = ${$self->features}{$uid}->{score};
191             $strand = ${$self->features}{$uid}->{strand} == -1 ? '-' : '+'; #default to +
192             $thick_start = $chrom_start;
193             $thick_end = $chrom_end;
194             $reserved = 0;
195             $block_count = 1;
196             $block_sizes = ${$self->features}{$uid}->{length}.",";
197             $block_starts = "0,";
198             @bedline = join ("\t", ($chrom,$chrom_start,$chrom_end,
199             $name,$score,$strand,$thick_start,
200             $thick_end,$reserved,$block_count,
201             $block_sizes, $block_starts));
202             print BEDOUT "@bedline\n";
203             }
204             close (BEDOUT);
205              
206             sortbed($bedname_u,".",$bedname,1,undef); # sort bed file
207              
208             } # end foreach
209             if (defined $log){close(LOG)};
210             }
211              
212             sub feature_summary {
213             my ($self, $dest) = @_;
214             my ($fn,$fh);
215             my $this_function = (caller(0))[3];
216              
217             croak "ERROR [$this_function] $dest does not exist\n"
218             unless (-d $dest);
219             croak "ERROR [$this_function] $self->accession not available\n"
220             unless ($self->has_accession);
221              
222             $fn = dir($dest,$self->accession.".summary.txt");
223             open $fh, ">", $fn or croak $!;
224              
225             print $fh "Accession\t ".$self->accession."\n";
226             print $fh "Origin \t ${$self->featstat}{origin}\n";
227             foreach my $ft (sort keys %{$self->featstat}){
228             next if ($ft =~ /total/ || $ft =~ /accession/ || $ft =~ /origin/);
229             print $fh "$ft\t${$self->featstat}{$ft}\n";
230             }
231             print $fh "Total\t${$self->featstat}{total}\n";
232             close $fh;
233             }
234              
235             __PACKAGE__->meta->make_immutable;
236              
237             1;
238              
239             __END__