File Coverage

blib/lib/Bio/ViennaNGS/AnnoC.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2014-10-22 14:03:15 mtw>
3              
4             package Bio::ViennaNGS::AnnoC;
5              
6 1     1   71970 use 5.12.0;
  1         3  
  1         38  
7 1     1   8 use version; our $VERSION = qv('0.07');
  1         2  
  1         5  
8 1     1   282 use Bio::ViennaNGS qw(sortbed);
  0            
  0            
9             use Bio::Tools::GFF;
10             use IPC::Cmd qw(can_run run);
11             use Path::Class;
12             use Carp;
13             use Moose;
14             use namespace::autoclean;
15              
16             has 'accession' => (
17             is => 'rw',
18             isa => 'Str',
19             predicate => 'has_accession',
20             );
21              
22             has 'features' => (
23             is => 'ro',
24             isa => 'HashRef',
25             predicate => 'has_features',
26             default => sub { {} },
27             );
28              
29             has 'nr_features' => (
30             is => 'ro',
31             isa => 'Int',
32             builder => '_get_nr_of_features',
33             lazy => 1,
34             );
35              
36             has 'featstat' => (
37             is => 'ro',
38             isa => 'HashRef',
39             builder => '_set_featstat',
40             predicate => 'has_featstat',
41             lazy => 1,
42             );
43              
44             before 'featstat' => sub {
45             my $self = shift;
46             $self->_get_nr_of_features();
47             };
48              
49             sub _set_featstat {
50             my $self = shift;
51             my $this_function = (caller(0))[3];
52             my %fs = ();
53             confess "ERROR [$this_function] \$self->features not available"
54             unless ($self->has_features);
55             $fs{total} = 0;
56             $fs{origin} = "$this_function ".$VERSION;
57             $fs{count} = $self->nr_features;
58             foreach my $uid ( keys %{$self->features} ){
59             my $gbkey = ${$self->features}{$uid}->{gbkey};
60             $fs{total} += 1;
61             unless (exists $fs{$gbkey}){
62             $fs{$gbkey} = 0;
63             }
64             $fs{$gbkey} += 1;
65             }
66             return \%fs;
67             }
68              
69             sub _get_nr_of_features {
70             my $self = shift;
71             my $this_function = (caller(0))[3];
72             confess "ERROR [$this_function] \$self->features not available"
73             unless ($self->has_features);
74             return (keys %{$self->features});
75             }
76              
77             sub parse_gff {
78             my ($self,$in_file) = @_;
79             my ($i,$gffio,$header,$f,$gbkey);
80             my $this_function = (caller(0))[3];
81              
82             $gffio = Bio::Tools::GFF->new(-file => $in_file,
83             -gff_version => 3,
84             );
85             $gffio->ignore_sequence(1);
86             if ($header = $gffio->next_segment() ){
87             $self->accession( $header->display_id() );
88             }
89             else{ carp "ERROR [$this_function] Cannot parse GFF header\n" }
90              
91             while($f = $gffio->next_feature()) {
92             my ($uid,$feat_name);
93             my @name = my @id = my @gbkeys = ();
94              
95             next if ($f->primary_tag() eq "exon");
96              
97             # 1) determine gbkey of the current feature
98             @gbkeys = $f->get_tag_values("gbkey");
99             $gbkey = $gbkeys[0];
100              
101             # 2) get a unique ID for each feature
102             if ($f->has_tag('ID')){
103             @id = $f->get_tag_values('ID');
104             $uid = $id[0]; # ID=id101
105             }
106             else {
107             croak "ERROR [$this_function] Feature '$gbkey' at pos.\
108             $f->start does not have \'ID\' attribute\n";
109             }
110              
111             # 3) assign parent's unique ID in case a parent record exists
112             if ($f->has_tag('Parent')){
113             @id = $f->get_tag_values('Parent');
114             $uid = $id[0]; # ID=id101
115             }
116              
117             # 4) find a name for the current feature, use 'Name' or 'ID' attribute
118             if ($f->has_tag('Name')){
119             @name = $f->get_tag_values('Name');
120             $feat_name = $name[0];
121             }
122             elsif ($f->has_tag('ID')){
123             @id = $f->get_tag_values('ID');
124             $feat_name = $id[0]; # ID=id101, use ID as feature name
125             }
126             else {
127             croak "ERROR [$this_function] Cannot set name for feature \
128             $f->gbkey at pos. $f->start\n";
129             }
130              
131             unless (exists ${$self->features}{$uid}) { # gene / ribosome_entry_site / etc.
132             ${$self->features}{$uid}->{start} = $f->start;
133             ${$self->features}{$uid}->{end} = $f->end;
134             ${$self->features}{$uid}->{strand} = $f->strand;
135             ${$self->features}{$uid}->{length} = $f->length;
136             ${$self->features}{$uid}->{seqid} = $f->seq_id;
137             ${$self->features}{$uid}->{score} = $f->score || 0;
138             ${$self->features}{$uid}->{gbkey} = $gbkey;
139             ${$self->features}{$uid}->{name} = $feat_name;
140             ${$self->features}{$uid}->{uid} = $uid;
141             }
142             else { # CDS / tRNA / rRNA / etc.
143             ${$self->features}{$uid}->{gbkey} = $gbkey; # gbkey for tRNA/ rRNA/ CDS etc
144             }
145             }
146             $gffio->close();
147             }
148              
149             sub features2bed {
150             my ($self,$gbkey,$dest,$bn,$log) = @_;
151             my ($chrom,$chrom_start,$chrom_end,$name,$score,$strand,$thick_start);
152             my ($thick_end,$reserved,$block_count,$block_sizes,$block_starts);
153             my @ft = ();
154             my $this_function = (caller(0))[3];
155             my $bedtools = can_run('bedtools') or
156             croak "ERROR [$this_function] Cannot find 'bedtools' utility";
157              
158             croak "ERROR [$this_function] $self->features not available"
159             unless ($self->has_features);
160             croak "ERROT [$this_function] $self->featstat not available"
161             unless ($self->has_featstat);
162             croak "ERROR [$this_function] $dest does not exist"
163             unless (-d $dest);
164             if (defined $log){open(LOG, ">>", $log) or croak $!;}
165              
166             if (defined $gbkey){ # dump features of just one genbank key
167             confess "ERROR [$this_function] genbank key \'$gbkey\' N/A in hash "
168             unless (exists ${$self->featstat}{$gbkey});
169             $ft[0] = $gbkey;
170             }
171             else{ # dump features for all genbank keys
172             foreach my $gbk (keys %{$self->featstat}) {
173             next if ($gbk eq 'total' || $gbk eq 'Src' || $gbk eq 'accession' ||
174             $gbk eq 'origin' || $gbk eq 'count');
175             push @ft,$gbk;
176             }
177             }
178              
179             foreach my $f (@ft){
180             my $bedname = file($dest,"$bn.$f.bed");
181             my $bedname_u = file($dest,"$bn.$f.u.bed");
182             open (BEDOUT, "> $bedname_u") or croak $!;
183              
184             # dump unsorted gene annotation from DS to BED12
185             foreach my $uid (keys %{$self->features}){
186             next unless (${$self->features}{$uid}->{gbkey} eq $f);
187             my @bedline = ();
188             $chrom = ${$self->features}{$uid}->{seqid};
189             $chrom_start = ${$self->features}{$uid}->{start};
190             $chrom_start--; # BED is 0-based
191             $chrom_end = ${$self->features}{$uid}->{end};
192             $name = ${$self->features}{$uid}->{name};
193             $score = ${$self->features}{$uid}->{score};
194             $strand = ${$self->features}{$uid}->{strand} == -1 ? '-' : '+'; #default to +
195             $thick_start = $chrom_start;
196             $thick_end = $chrom_end;
197             $reserved = 0;
198             $block_count = 1;
199             $block_sizes = ${$self->features}{$uid}->{length}.",";
200             $block_starts = "0,";
201             @bedline = join ("\t", ($chrom,$chrom_start,$chrom_end,
202             $name,$score,$strand,$thick_start,
203             $thick_end,$reserved,$block_count,
204             $block_sizes, $block_starts));
205             print BEDOUT "@bedline\n";
206             }
207             close (BEDOUT);
208              
209             sortbed($bedname_u,".",$bedname,1,undef); # sort bed file
210              
211             } # end foreach
212             if (defined $log){close(LOG)};
213             }
214              
215             sub feature_summary {
216             my ($self, $dest) = @_;
217             my ($fn,$fh);
218             my $this_function = (caller(0))[3];
219              
220             croak "ERROR [$this_function] $dest does not exist\n"
221             unless (-d $dest);
222             croak "ERROR [$this_function] $self->accession not available\n"
223             unless ($self->has_accession);
224              
225             $fn = dir($dest,$self->accession.".summary.txt");
226             open $fh, ">", $fn or croak $!;
227              
228             print $fh "Accession\t ".$self->accession."\n";
229             print $fh "Origin \t ${$self->featstat}{origin}\n";
230             foreach my $ft (sort keys %{$self->featstat}){
231             next if ($ft =~ /total/ || $ft =~ /accession/ || $ft =~ /origin/);
232             print $fh "$ft\t${$self->featstat}{$ft}\n";
233             }
234             print $fh "Total\t${$self->featstat}{total}\n";
235             close $fh;
236             }
237              
238             __PACKAGE__->meta->make_immutable;
239              
240             1;
241              
242             __END__