File Coverage

blib/lib/Bio/RNA/RNAaliSplit.pm
Criterion Covered Total %
statement 39 106 36.7
branch 0 26 0.0
condition n/a
subroutine 13 16 81.2
pod 0 2 0.0
total 52 150 34.6


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2017-05-28 17:03:14 mtw>
3              
4             # Bio::RNA::RNAaliSplit.pm: Handler for horizontally splitting alignments
5              
6             package Bio::RNA::RNAaliSplit;
7              
8 1     1   96602 use version; our $VERSION = qv('0.05');
  1         2  
  1         8  
9 1     1   81 use Carp;
  1         2  
  1         50  
10 1     1   504 use Data::Dumper;
  1         6570  
  1         55  
11 1     1   442 use Moose;
  1         502316  
  1         7  
12 1     1   7314 use Moose::Util::TypeConstraints;
  1         4  
  1         14  
13 1     1   4066 use Path::Class;
  1         22162  
  1         56  
14 1     1   11 use File::Basename;
  1         3  
  1         53  
15 1     1   7 use IPC::Cmd qw(can_run run);
  1         2  
  1         45  
16 1     1   579 use Bio::AlignIO;
  1         98863  
  1         33  
17 1     1   575 use Storable 'dclone';
  1         2422  
  1         66  
18 1     1   7 use File::Path qw(make_path);
  1         3  
  1         103  
19 1     1   452 use FileDirUtil;
  1         59735  
  1         872  
20             #use diagnostics;
21              
22             subtype 'MyAln' => as class_type('Bio::AlignIO');
23              
24             coerce 'MyAln'
25             => from 'HashRef'
26             => via { Bio::AlignIO->new( %{ $_ } ) };
27              
28             has 'format' => (
29             is => 'ro',
30             isa => 'Str',
31             predicate => 'has_format',
32             default => 'ClustalW',
33             required => 1,
34             );
35              
36             has 'alignment' => (
37             is => 'rw',
38             isa => 'MyAln',
39             predicate => 'has_aln',
40             coerce => 1,
41             );
42              
43             has 'next_aln' => (
44             is => 'rw',
45             isa => 'Bio::SimpleAlign',
46             predicate => 'has_next_aln',
47             init_arg => undef,
48             );
49              
50             has 'dump' => (
51             is => 'rw',
52             isa => 'Num',
53             predicate => 'has_dump_flag',
54             );
55              
56             has 'hammingdistN' => (
57             is => 'rw',
58             isa => 'Num',
59             default => '-1',
60             predicate => 'has_hammingN',
61             init_arg => undef,
62             );
63              
64             has 'hammingdistX' => (
65             is => 'rw',
66             isa => 'Num',
67             default => '-1',
68             predicate => 'has_hammingX',
69             init_arg => undef,
70             );
71              
72             with 'FileDirUtil';
73             with 'Bio::RNA::RNAaliSplit::Roles';
74              
75             sub BUILD {
76 0     0 0   my $self = shift;
77 0           my $this_function = (caller(0))[3];
78 0 0         confess "ERROR [$this_function] \$self->ifile not available"
79             unless ($self->has_ifile);
80 0           $self->alignment({-file => $self->ifile,
81             -format => $self->format,
82             -displayname_flat => 1} ); # discard position in sequence IDs
83 0           $self->next_aln($self->alignment->next_aln);
84 0 0         unless($self->has_odir){
85 0 0         unless($self->has_dirnam){$self->dirnam("as")}
  0            
86 0           $self->odir( [$self->ifile->dir,$self->dirnam] );
87             }
88 0           my @created = make_path($self->odir, {error => \my $err});
89 0 0         confess "ERROR [$this_function] could not create output directory $self->odir"
90             if (@$err);
91 0           $self->set_ifilebn;
92              
93 0 0         if ($self->has_dump_flag){
94             # dump ifile as aln in ClustalW format to odir/input
95 0           my $iodir = $self->odir->subdir('input');
96 0           mkdir($iodir);
97 0           my $ialnfile = file($iodir,$self->ifilebn.".aln");
98 0           my $alnio = Bio::AlignIO->new(-file => ">$ialnfile",
99             -format => "ClustalW",
100             -flush => 0,
101             -displayname_flat => 1 );
102 0           my $aln2 = $self->next_aln->select_noncont((1..$self->next_aln->num_sequences));
103 0           $alnio->write_aln($aln2);
104             # end dump input aln file
105             }
106              
107 0 0         if ($self->next_aln->num_sequences == 2){ $self->_hamming() }
  0            
108             }
109              
110             sub dump_subalignment {
111 0     0 0   my ($self,$alipathsegment,$token,$what) = @_;
112 0           my $this_function = (caller(0))[3];
113 0           my ($aln,$aln2,$name);
114              
115 0 0         croak "ERROR [$this_function] argument 'token' not provided"
116             unless (defined($token));
117              
118             # create output path
119 0           my $ids = join "_", @$what;
120 0 0         unless (defined($alipathsegment)){$alipathsegment = "tmp"}
  0            
121 0           my $oodir = $self->odir->subdir($alipathsegment);
122 0           mkdir($oodir);
123              
124             # create info file
125 0           my $oinfofile = file($oodir,$token.".info");
126 0 0         open my $oinfo, ">", $oinfofile or die $!;
127 0           foreach my $entry (@$what){
128 0           my $key = $entry-1;
129 0           my $val = ${$self->next_aln}{_order}->{$key};
  0            
130 0           print $oinfo join "\t", ($entry, $val, "\n");
131             }
132 0           close($oinfo);
133              
134             # create subalignment in Clustal and Stockholm format
135 0           my $oalifile_clustal = file($oodir,$token.".aln");
136 0           my $oalifile_stockholm = file($oodir,$token.".stk");
137 0           my $oali_clustal = Bio::AlignIO->new(-file => ">$oalifile_clustal",
138             -format => "ClustalW",
139             -flush => 0,
140             -displayname_flat => 1 );
141 0           my $oali_stockholm = Bio::AlignIO->new(-file => ">$oalifile_stockholm",
142             -format => "Stockholm",
143             -flush => 0,
144             -displayname_flat => 1 );
145 0           $aln = $self->next_aln->select_noncont(@$what);
146 0           $oali_clustal->write_aln( $aln );
147 0           $oali_stockholm->write_aln( $aln );
148              
149             # create subalignment fasta file
150 0           my $ofafile = file($oodir,$token.".fa");
151 0           my $ofa = Bio::AlignIO->new(-file => ">$ofafile",
152             -format => "fasta",
153             -flush => 0,
154             -displayname_flat => 1 );
155 0           $aln2 = $aln->remove_gaps;
156 0           $ofa->write_aln( $aln2 );
157              
158             # extract sequences from alignment and dump to .seq file
159             # NOTE that these sequences do contain gap symbols, intentionally (!)
160             # these can then be replaced by Ns to compute eg hamming distance
161 0           my $oseqfile = file($oodir,$token.".seq");
162 0 0         open my $seqfile, ">", $oseqfile or die $!;
163 0           foreach my $seq ($aln->each_seq) {
164 0           print $seqfile $seq->seq,"\n";
165             }
166 0           close($seqfile);
167              
168 0           return ( $oalifile_clustal,$oalifile_stockholm );
169             }
170              
171             sub _hamming {
172 0     0     my $self = shift;
173 0           my $this_function = (caller(0))[3];
174 0           my $hamming = -1;
175 0 0         croak "ERROR [$this_function] cannot compute Hamming distance for $self->next_aln->num_sequences sequences"
176             if ($self->next_aln->num_sequences != 2);
177              
178 0           my $aln = $self->next_aln->select_noncont((1,2));
179              
180             # compute Hamming distance of the aligned sequences, replacing gaps with Ns
181 0           my $alnN = dclone($aln);
182 0 0         croak("ERROR [$this_function] cannot replace gaps with Ns")
183             unless ($alnN->map_chars('-','N') == 1);
184 0           my $seq1 = $alnN->get_seq_by_pos(1)->seq;
185 0           my $seq2 = $alnN->get_seq_by_pos(2)->seq;
186 0 0         croak "ERROR [$this_function] sequence length differs"
187             unless(length($seq1)==length($seq2));
188 0           my $hammingN = ($seq1 ^ $seq2) =~ tr/\001-\255//;
189 0           $self->hammingdistN($hammingN);
190              
191             # print $self->ifilebn,":\n";
192             # print ">>s1: $seq1\n";
193             # print ">>s2: $seq2\n";
194             # print "** dhN = ".$self->hammingdistN."\n";
195             # print "+++\n";
196             }
197              
198 1     1   11 no Moose;
  1         2  
  1         8  
199              
200              
201              
202             =head1 NAME
203              
204             Bio::RNA::RNAaliSplit - Split and deconvolute structural RNA multiple
205             sequence alignments
206              
207             =head1 VERSION
208              
209             Version 0.05
210              
211             =cut
212              
213             =head1 SYNOPSIS
214              
215             This module is a L<Moose> handler for horizontal splitting of
216             structural RNA multiple sequence alignments (MSA). It employs third
217             party tools (RNAalifold, RNAz, R-scape) for classification of
218             subalignments, each folding into a common consensus structure.
219              
220             =head1 AUTHOR
221              
222             Michael T. Wolfinger, C<< <michael at wolfinger.eu> >>
223              
224             =head1 BUGS
225              
226             Please report any bugs or feature requests to
227             C<bug-bio-rna-rnaalisplit at rt.cpan.org>, or through the web
228             interface at
229             L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-RNAaliSplit>.
230             I will be notified, and then you'll automatically be notified of
231             progress on your bug as I make changes.
232              
233              
234             =head1 SUPPORT
235              
236             You can find documentation for this module with the perldoc command.
237              
238             perldoc Bio::RNA::RNAaliSplit
239              
240              
241             You can also look for information at:
242              
243             =over 4
244              
245             =item * RT: CPAN's request tracker (report bugs here)
246              
247             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-RNAaliSplit>
248              
249             =item * AnnoCPAN: Annotated CPAN documentation
250              
251             L<http://annocpan.org/dist/Bio-RNA-RNAaliSplit>
252              
253             =item * CPAN Ratings
254              
255             L<http://cpanratings.perl.org/d/Bio-RNA-RNAaliSplit>
256              
257             =item * Search CPAN
258              
259             L<http://search.cpan.org/dist/Bio-RNA-RNAaliSplit/>
260              
261             =back
262              
263              
264             =head1 LICENSE AND COPYRIGHT
265              
266             Copyright 2017 Michael T. Wolfinger <michael@wolfinger.eu> and <michael.wolfinger@univie.ac.at>
267              
268             This program is free software; you can redistribute it and/or
269             modify it under the terms of the GNU Affero General Public
270             License as published by the Free Software Foundation; either
271             version 3 of the License, or (at your option) any later version.
272              
273             This program is distributed in the hope that it will be useful,
274             but WITHOUT ANY WARRANTY; without even the implied warranty of
275             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
276             Affero General Public License for more details.
277              
278             You should have received a copy of the GNU Affero General Public
279             License along with this program. If not, see
280             L<http://www.gnu.org/licenses/>.
281              
282             =cut
283              
284             1; # End of Bio::RNA::RNAaliSplit
285              
286