File Coverage

blib/lib/Bio/FastParsers/Hmmer/Standard/Domain.pm
Criterion Covered Total %
statement 18 20 90.0
branch n/a
condition n/a
subroutine 6 8 75.0
pod 0 5 0.0
total 24 33 72.7


line stmt bran cond sub pod time code
1             package Bio::FastParsers::Hmmer::Standard::Domain;
2             # ABSTRACT: Internal class for standard HMMER parser
3             # CONTRIBUTOR: Arnaud DI FRANCO <arnaud.difranco@gmail.com>
4             $Bio::FastParsers::Hmmer::Standard::Domain::VERSION = '0.213510';
5 7     7   5300 use Moose;
  7         37  
  7         51  
6 7     7   49994 use namespace::autoclean;
  7         22  
  7         62  
7              
8 7     7   682 use List::AllUtils qw(mesh);
  7         21  
  7         6556  
9              
10              
11             # public attributes
12              
13             has $_ => (
14             is => 'ro',
15             isa => 'Str',
16             required => 1,
17             ) for qw(seq scoreseq profile probabilities);
18              
19             with 'Bio::FastParsers::Roles::Domainable';
20              
21             around BUILDARGS => sub {
22             my ($orig, $class, $inargs) = @_;
23              
24             my @raw = @{ $inargs->{raw} };
25             my $summary = $inargs->{summary};
26              
27             my %outargs;
28              
29             # parse header
30             my @header_vals = $raw[0] =~ m/([\d\.]+)/xmsg;
31             $outargs{'rank'} = $header_vals[0];
32             $outargs{'dom_score'} = $header_vals[1];
33             $outargs{'c_evalue'}
34             = @header_vals == 3 ? $header_vals[2] : join 'e-', @header_vals[2,3]
35             ;
36              
37             # coerce numeric fields to numbers
38             %outargs = map { $_ => 0 + $outargs{$_} } keys %outargs;
39              
40             # parse domain alignment
41              
42             # Alignment is made of 4 lines: best match to profile, scoring
43             # correspondance, sequence alignment and posterior predictive. Each line
44             # is shifted to the right by the same amount of characters, which is
45             # different for each target. To get the size of the shift, I insert
46             # special characters on the seqline and split on hit. Each part gives the
47             # right length to extract correctly the information.
48              
49             my $profileline = $raw[1];
50             my $scoreline = $raw[2];
51             my $probline = $raw[4];
52             ( my $seqline = $raw[3] )
53             =~ s{(^\s+.*\s+\d+\s+)(\S+)\s\d+\s*$}{$1\|\|\|$2}xms;
54             chomp $seqline;
55             my ($skip, $tmpseq) = split /\|{3}/xms, $seqline;
56             my $scoreseq = substr $scoreline, length $skip, length $tmpseq;
57             my $profileseq = substr $profileline, length $skip, length $tmpseq;
58             my $probabilities = substr $probline, length $skip, length $tmpseq;
59             $outargs{'seq'} = $tmpseq;
60             $outargs{'scoreseq'} = $scoreseq;
61             $outargs{'profile'} = $profileseq;
62             $outargs{'probabilities'} = $probabilities;
63              
64             # attributes from summary domtbl
65             my @summary_attrs = qw(
66             dom_bias i_evalue
67             hmm_from hmm_to
68             ali_from ali_to
69             env_from env_to
70             acc
71             );
72             my @summary_slots = qw(4 6 7 8 10 11 13 14 16);
73              
74             # parse summary
75             # and coerce numeric fields to numbers
76             my @fields = split /\s+/xms, $summary;
77             my @summary_vals = map { 0 + $fields[$_] } @summary_slots;
78             my %summary_hash = mesh @summary_attrs, @summary_vals;
79              
80             # return expected constructor hash
81             return $class->$orig( %outargs, %summary_hash );
82             };
83              
84             # TODO: check if this could not be avoided
85             # as this looks like code duplication with Bio::MUST::Core
86             # This one too ?
87              
88             sub get_degap_scoreseq {
89 3     3 0 19 my $self = shift;
90 3         99 my $tmpseq = $self->seq;
91 3         99 my $score = $self->scoreseq;
92              
93             # Need brackets or else pos == 1
94 3         15 while ( (my $pos = index($tmpseq, '-') ) != -1 ) {
95 87         158 substr $tmpseq, $pos, 1, q{};
96 87         198 substr $score, $pos, 1, q{};
97             }
98              
99 3         30 return $score;
100             }
101              
102              
103             # aliases
104              
105             sub expect {
106 12     12 0 447 return shift->c_evalue;
107             }
108              
109             sub score {
110 12     12 0 8711 return shift->dom_score;
111             }
112              
113             sub num {
114 0     0 0   return shift->number;
115             }
116              
117             sub idx {
118 0     0 0   return shift->number-1;
119             }
120              
121             __PACKAGE__->meta->make_immutable;
122             1;
123              
124             __END__
125              
126             =pod
127              
128             =head1 NAME
129              
130             Bio::FastParsers::Hmmer::Standard::Domain - Internal class for standard HMMER parser
131              
132             =head1 VERSION
133              
134             version 0.213510
135              
136             =head1 SYNOPSIS
137              
138             # TODO
139              
140             =head1 DESCRIPTION
141              
142             # TODO
143              
144             =head1 AUTHOR
145              
146             Denis BAURAIN <denis.baurain@uliege.be>
147              
148             =head1 CONTRIBUTOR
149              
150             =for stopwords Arnaud DI FRANCO
151              
152             Arnaud DI FRANCO <arnaud.difranco@gmail.com>
153              
154             =head1 COPYRIGHT AND LICENSE
155              
156             This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
157              
158             This is free software; you can redistribute it and/or modify it under
159             the same terms as the Perl 5 programming language system itself.
160              
161             =cut