| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# -*-CPerl-*- |
|
2
|
|
|
|
|
|
|
# Last changed Time-stamp: <2017-05-28 17:05:11 mtw> |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# Bio::RNA::RNAaliSplit::WrapAnalyseDists.pm: Wrapper for computing |
|
5
|
|
|
|
|
|
|
# split decompositions |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# Requires AnalyseDists executable from the ViennaRNA poackage |
|
8
|
|
|
|
|
|
|
# available to the Perl interpreter |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package Bio::RNA::RNAaliSplit::WrapAnalyseDists; |
|
11
|
|
|
|
|
|
|
|
|
12
|
1
|
|
|
1
|
|
1666
|
use version; our $VERSION = qv('0.05'); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
9
|
|
|
13
|
1
|
|
|
1
|
|
137
|
use Carp; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
68
|
|
|
14
|
1
|
|
|
1
|
|
14
|
use Data::Dumper; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
45
|
|
|
15
|
1
|
|
|
1
|
|
5
|
use Moose; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
7
|
|
|
16
|
1
|
|
|
1
|
|
9950
|
use IPC::Cmd qw(can_run run); |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
111
|
|
|
17
|
1
|
|
|
1
|
|
944
|
use Array::Set qw(set_diff); |
|
|
1
|
|
|
|
|
2406
|
|
|
|
1
|
|
|
|
|
96
|
|
|
18
|
1
|
|
|
1
|
|
14
|
use Digest::MD5 qw(md5_base64); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
70
|
|
|
19
|
1
|
|
|
1
|
|
10
|
use Path::Class; |
|
|
1
|
|
|
|
|
5
|
|
|
|
1
|
|
|
|
|
103
|
|
|
20
|
1
|
|
|
1
|
|
10
|
use File::Path qw(make_path); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
2723
|
|
|
21
|
|
|
|
|
|
|
#use diagnostics; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
my ($analysedists,$oodir); |
|
24
|
|
|
|
|
|
|
my %sets = (); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
has 'basename' => ( |
|
27
|
|
|
|
|
|
|
is => 'rw', |
|
28
|
|
|
|
|
|
|
isa => 'Str', |
|
29
|
|
|
|
|
|
|
predicate => 'has_basename', |
|
30
|
|
|
|
|
|
|
); |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
has 'splits' => ( |
|
33
|
|
|
|
|
|
|
is => 'rw', |
|
34
|
|
|
|
|
|
|
isa => 'ArrayRef', |
|
35
|
|
|
|
|
|
|
default => sub { [] }, |
|
36
|
|
|
|
|
|
|
predicate => 'has_splits', |
|
37
|
|
|
|
|
|
|
traits => ['Array'], |
|
38
|
|
|
|
|
|
|
handles => { |
|
39
|
|
|
|
|
|
|
allsplits => 'elements', |
|
40
|
|
|
|
|
|
|
count => 'count', |
|
41
|
|
|
|
|
|
|
add => 'push', |
|
42
|
|
|
|
|
|
|
pop => 'pop', |
|
43
|
|
|
|
|
|
|
}, |
|
44
|
|
|
|
|
|
|
); |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
has 'nr_splits' => ( |
|
47
|
|
|
|
|
|
|
is => 'rw', |
|
48
|
|
|
|
|
|
|
isa => 'Num', |
|
49
|
|
|
|
|
|
|
predicate => 'has_nr_splits', |
|
50
|
|
|
|
|
|
|
); |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
has 'dim' => ( |
|
53
|
|
|
|
|
|
|
is => 'rw', |
|
54
|
|
|
|
|
|
|
isa => 'Num', |
|
55
|
|
|
|
|
|
|
predicate => 'has_dim', |
|
56
|
|
|
|
|
|
|
); |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
with 'FileDirUtil'; |
|
59
|
|
|
|
|
|
|
with 'Bio::RNA::RNAaliSplit::Roles'; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub BUILD { |
|
62
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
63
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
|
64
|
0
|
0
|
|
|
|
|
confess "ERROR [$this_function] \$self->ifile not available" |
|
65
|
|
|
|
|
|
|
unless ($self->has_ifile); |
|
66
|
0
|
0
|
|
|
|
|
$analysedists = can_run('AnalyseDists') or |
|
67
|
|
|
|
|
|
|
croak "ERROR [$this_function] AnalyseDists not found"; |
|
68
|
0
|
0
|
|
|
|
|
unless($self->has_odir){ |
|
69
|
0
|
0
|
|
|
|
|
unless($self->has_dirnam){self->dirnam("as")} |
|
|
0
|
|
|
|
|
|
|
|
70
|
0
|
|
|
|
|
|
$self->odir( [$self->ifile->dir,$self->dirnam] ); |
|
71
|
|
|
|
|
|
|
} |
|
72
|
0
|
|
|
|
|
|
$oodir = $self->odir->subdir("analysedists"); |
|
73
|
0
|
|
|
|
|
|
my @created = make_path($oodir, {error => \my $err}); |
|
74
|
0
|
0
|
|
|
|
|
confess "ERROR [$this_function] could not create output directory $self->oodir" |
|
75
|
|
|
|
|
|
|
if (@$err); |
|
76
|
0
|
|
|
|
|
|
$self->dim( $self->_get_dim() ); |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# do computation |
|
79
|
0
|
|
|
|
|
|
$self->NeighborJoining(); |
|
80
|
0
|
|
|
|
|
|
$self->SplitDecomposition(); |
|
81
|
0
|
|
|
|
|
|
$self->nr_splits($self->count); |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub NeighborJoining { |
|
85
|
|
|
|
|
|
|
# TODO warn if negative branch lengths occur |
|
86
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
87
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
|
88
|
0
|
|
|
|
|
|
my ($nj_outfilename,$nj_treefilename,$nj_out,$nj_tree); |
|
89
|
|
|
|
|
|
|
|
|
90
|
0
|
0
|
|
|
|
|
if ($self->has_basename){ |
|
91
|
0
|
|
|
|
|
|
$nj_outfilename = $self->basename.".nj.out"; |
|
92
|
0
|
|
|
|
|
|
$nj_treefilename = $self->basename.".nj.ps"; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
else{ |
|
95
|
0
|
|
|
|
|
|
$nj_outfilename = "nj.out"; |
|
96
|
0
|
|
|
|
|
|
$nj_treefilename = "nj.ps"; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
0
|
|
|
|
|
|
$nj_out = file($oodir,$nj_outfilename); |
|
99
|
0
|
|
|
|
|
|
$nj_tree = file($oodir,$nj_treefilename); |
|
100
|
0
|
|
|
|
|
|
open my $fh, ">", $nj_out; |
|
101
|
|
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
my $ad_cmd = $analysedists." -Xn < ".$self->ifile; |
|
103
|
0
|
|
|
|
|
|
my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = |
|
104
|
|
|
|
|
|
|
run( command => $ad_cmd, verbose => 0 ); |
|
105
|
0
|
0
|
|
|
|
|
if( !$success ) { |
|
106
|
0
|
|
|
|
|
|
print STDERR "ERROR [$this_function] Call to $analysedists unsuccessful\n"; |
|
107
|
0
|
|
|
|
|
|
print STDERR "ERROR: this is what the command printed:\n"; |
|
108
|
0
|
|
|
|
|
|
print join "", @$full_buf; |
|
109
|
0
|
|
|
|
|
|
croak $!; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
0
|
|
|
|
|
|
my $stdout_buffer = join "",@$stdout_buf; |
|
112
|
0
|
|
|
|
|
|
my @out = split /\n/, $stdout_buffer; |
|
113
|
0
|
|
|
|
|
|
foreach my $line( @out){print $fh $line,"\n"} |
|
|
0
|
|
|
|
|
|
|
|
114
|
0
|
|
|
|
|
|
close($fh); |
|
115
|
0
|
|
|
|
|
|
rename "nj.ps", $nj_tree; |
|
116
|
0
|
|
|
|
|
|
$self->_parse_nj($stdout_buffer); |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# parse the output of AnalyseDists -Xn |
|
120
|
|
|
|
|
|
|
# populate array of hashes, each holding two sets of nodes corresponding to splits |
|
121
|
|
|
|
|
|
|
sub _parse_nj { |
|
122
|
0
|
|
|
0
|
|
|
my ($self,$nj) = @_; |
|
123
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
|
124
|
0
|
|
|
|
|
|
my %data = (); |
|
125
|
0
|
|
|
|
|
|
my $num; |
|
126
|
0
|
|
|
|
|
|
my $count = 1; |
|
127
|
0
|
|
|
|
|
|
my @lines = split /\n/,$nj; |
|
128
|
0
|
|
|
|
|
|
foreach my $line (@lines){ |
|
129
|
0
|
|
|
|
|
|
my @s1 = (); |
|
130
|
0
|
|
|
|
|
|
my @set1 = (); |
|
131
|
0
|
|
|
|
|
|
my @set2 = (); |
|
132
|
0
|
0
|
|
|
|
|
next if ($line =~ m/^>\s+\D/); |
|
133
|
0
|
0
|
|
|
|
|
if ($line =~ m/^>\s+(\d+)/){$num = $1;next} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
134
|
0
|
0
|
|
|
|
|
last if ($count++ >= $num); |
|
135
|
0
|
|
|
|
|
|
my @all = (1..$num); |
|
136
|
|
|
|
|
|
|
#print " #### $line\n"; |
|
137
|
0
|
0
|
|
|
|
|
croak "ERROR [$this_function] Cannot parse neighbor joining graph line\n$line\n" |
|
138
|
|
|
|
|
|
|
unless ($line =~ m/^\s*(\d+)\s+(\d+)\s+(\-?\d+\.\d+)\s+(\-?\d+\.\d+)/g); |
|
139
|
0
|
|
|
|
|
|
my $i = $1; |
|
140
|
0
|
|
|
|
|
|
my $j = $2; |
|
141
|
|
|
|
|
|
|
|
|
142
|
0
|
|
|
|
|
|
push @{$data{$i}}, $j; |
|
|
0
|
|
|
|
|
|
|
|
143
|
0
|
0
|
|
|
|
|
if (exists $data{$j}){push @{$data{$i}}, @{$data{$j}} }; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# print Dumper(\%data); |
|
145
|
0
|
|
|
|
|
|
push @s1, $i; # populate set1 |
|
146
|
0
|
|
|
|
|
|
push @s1, @{$data{$i}}; |
|
|
0
|
|
|
|
|
|
|
|
147
|
0
|
|
|
|
|
|
@set1 = sort {$a <=> $b} @s1; |
|
|
0
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
my @diff = set_diff(\@all, \@set1); |
|
149
|
0
|
|
|
|
|
|
@set2 = sort {$a <=> $b} @{$diff[0]}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
150
|
0
|
|
|
|
|
|
my $set1_key = md5_base64(join "_", @set1); |
|
151
|
0
|
|
|
|
|
|
my $set2_key = md5_base64(join "_", @set2); |
|
152
|
0
|
0
|
0
|
|
|
|
if (!exists($sets{$set1_key}) && !exists($sets{$set2_key})){ |
|
153
|
0
|
|
|
|
|
|
my $type; |
|
154
|
0
|
|
|
|
|
|
$sets{$set1_key} = \@set1; # lookup table for previously seen sets |
|
155
|
0
|
|
|
|
|
|
$sets{$set2_key} = \@set2; |
|
156
|
0
|
0
|
|
|
|
|
next if (scalar(@set1) == "0"); # skip empty sets (ie input alignment) |
|
157
|
0
|
0
|
|
|
|
|
next if (scalar(@set2) == "0"); |
|
158
|
0
|
0
|
0
|
|
|
|
if(scalar(@set1)==1||scalar(@set2)==1){$type="NJT"} # trivial |
|
|
0
|
|
|
|
|
|
|
|
159
|
0
|
|
|
|
|
|
else{$type="NJN"} # non-trivial |
|
160
|
0
|
|
|
|
|
|
$self->add( {S1=>\@set1,S2=>\@set2,ori=>"NJ",type=>$type} ); |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
else{ |
|
163
|
|
|
|
|
|
|
#print STDERR "INFO [$this_function] previously identified sets \n@set1\n@set2\n"; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
# print Dumper(\@set1); |
|
166
|
|
|
|
|
|
|
# print Dumper(\@set2); |
|
167
|
|
|
|
|
|
|
# print "+++++++++++++++++++++++++++++++++++\n"; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub SplitDecomposition { |
|
172
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
173
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
|
174
|
0
|
|
|
|
|
|
my ($sd_outfilename,$sd_out); |
|
175
|
0
|
0
|
|
|
|
|
if ($self->has_basename){$sd_outfilename = $self->basename.".sd.out"} |
|
|
0
|
|
|
|
|
|
|
|
176
|
0
|
|
|
|
|
|
else{$sd_outfilename = "sd.out"} |
|
177
|
0
|
|
|
|
|
|
$sd_out = file($oodir,$sd_outfilename); |
|
178
|
0
|
|
|
|
|
|
open my $fh, ">", $sd_out; |
|
179
|
|
|
|
|
|
|
|
|
180
|
0
|
|
|
|
|
|
my $sd_cmd = $analysedists." -Xs < ".$self->ifile; |
|
181
|
0
|
|
|
|
|
|
my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = |
|
182
|
|
|
|
|
|
|
run( command => $sd_cmd, verbose => 0 ); |
|
183
|
0
|
0
|
|
|
|
|
if( !$success ) { |
|
184
|
0
|
|
|
|
|
|
print STDERR "ERROR [$this_function] Call to $analysedists unsuccessful\n"; |
|
185
|
0
|
|
|
|
|
|
print STDERR "ERROR: this is what the command printed:\n"; |
|
186
|
0
|
|
|
|
|
|
print join "", @$full_buf; |
|
187
|
0
|
|
|
|
|
|
croak $!; |
|
188
|
|
|
|
|
|
|
} |
|
189
|
0
|
|
|
|
|
|
my $stdout_buffer = join "", @$stdout_buf; |
|
190
|
0
|
|
|
|
|
|
my @out = split /\n/, $stdout_buffer; |
|
191
|
0
|
|
|
|
|
|
foreach my $line( @out){print $fh $line,"\n"} |
|
|
0
|
|
|
|
|
|
|
|
192
|
0
|
|
|
|
|
|
close($fh); |
|
193
|
0
|
|
|
|
|
|
$self->_parse_sd($stdout_buffer); # parse split graph data |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# parse the output of AnalyseDists -Xs |
|
197
|
|
|
|
|
|
|
# populate array of hashes, each holding two sets of nodes corresponding to splits |
|
198
|
|
|
|
|
|
|
sub _parse_sd { |
|
199
|
0
|
|
|
0
|
|
|
my ($self,$sd) = @_; |
|
200
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
|
201
|
0
|
|
|
|
|
|
my $num; |
|
202
|
0
|
|
|
|
|
|
my @lines = split /\n/,$sd; |
|
203
|
0
|
|
|
|
|
|
foreach my $line (@lines){ |
|
204
|
0
|
0
|
|
|
|
|
next if ($line =~ m/^>\s+\D/); |
|
205
|
0
|
0
|
|
|
|
|
if ($line =~ m/^>\s+(\d+)/){$num = $1;next} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
206
|
0
|
0
|
|
|
|
|
last if ($line =~ m/^\s*\d+\.\d+\s+\:\s+\{\s+\[Split prime fraction\]\s+\}/g ); |
|
207
|
|
|
|
|
|
|
# print "$line\n"; |
|
208
|
0
|
0
|
|
|
|
|
croak "ERROR [$this_function] Cannot parse split graph line\n$line\n" |
|
209
|
|
|
|
|
|
|
unless ($line =~ m/^\s*\d+\s+\d+\.\d+\s+:\s+\{\s+([\d+\s+]+)\|/g); |
|
210
|
0
|
|
|
|
|
|
my @foo = split /\s+/, $1; # set 1 |
|
211
|
0
|
|
|
|
|
|
my @moo = (1 .. $self->dim); |
|
212
|
0
|
|
|
|
|
|
my @bar = (); # set 2 |
|
213
|
0
|
|
|
|
|
|
foreach my $i (@moo){ |
|
214
|
0
|
0
|
|
|
|
|
push (@bar, $i) unless ( grep {$i == $_}@foo ); |
|
|
0
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
} |
|
216
|
0
|
|
|
|
|
|
my @set1 = sort {$a <=> $b} @foo; |
|
|
0
|
|
|
|
|
|
|
|
217
|
0
|
|
|
|
|
|
my @set2 = sort {$a <=> $b} @bar; |
|
|
0
|
|
|
|
|
|
|
|
218
|
0
|
|
|
|
|
|
my $set1_key = md5_base64(join "_", @set1); |
|
219
|
0
|
|
|
|
|
|
my $set2_key = md5_base64(join "_", @set2); |
|
220
|
0
|
0
|
0
|
|
|
|
if (!exists($sets{$set1_key}) && !exists($sets{$set2_key})){ |
|
221
|
0
|
|
|
|
|
|
my $type; |
|
222
|
0
|
|
|
|
|
|
$sets{$set1_key} = \@set1; # lookup table for previously seen sets |
|
223
|
0
|
|
|
|
|
|
$sets{$set2_key} = \@set2; |
|
224
|
0
|
0
|
0
|
|
|
|
if (scalar(@set1)==1 || scalar(@set2)==1){$type="SDT"} # trivial calse |
|
|
0
|
|
|
|
|
|
|
|
225
|
0
|
|
|
|
|
|
else {$type="SDN"} |
|
226
|
0
|
|
|
|
|
|
$self->add( {S1=>\@set1,S2=>\@set2,ori=>"SD",type=>$type} ); |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
else{ |
|
229
|
|
|
|
|
|
|
# print STDERR "INFO [$this_function] previously identified sets \n@set1\n@set2\n"; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub _get_dim { |
|
235
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
236
|
0
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
|
237
|
0
|
|
|
|
|
|
my $dim = -1 ; |
|
238
|
0
|
0
|
|
|
|
|
open my $fh, "<", $self->ifile or die $!; |
|
239
|
0
|
|
|
|
|
|
while(<$fh>){ |
|
240
|
0
|
0
|
|
|
|
|
if (m/^>\s+X\s+(\d+)/){$dim = $1;last;} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
} |
|
242
|
0
|
0
|
|
|
|
|
croak "ERROR [$this_function] could not parse dimension from input matrix" |
|
243
|
|
|
|
|
|
|
if ($dim == -1); |
|
244
|
0
|
|
|
|
|
|
close($fh); |
|
245
|
0
|
|
|
|
|
|
return $dim; |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
1; |
|
249
|
|
|
|
|
|
|
|