File Coverage

blib/lib/Bio/RNA/RNAaliSplit/WrapAnalyseDists.pm
Criterion Covered Total %
statement 33 182 18.1
branch 0 54 0.0
condition 0 12 0.0
subroutine 11 17 64.7
pod 0 3 0.0
total 44 268 16.4


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2017-03-09 16:16:41 michl>
3              
4             # Bio::RNA::RNAaliSplit::WrapAnalyseDists.pm: Wrapper for computing
5             # split decompositions
6             #
7             # Requires AnalyseDists executable from the ViennaRNA poackage
8             # available to the Perl interpreter
9              
10             package Bio::RNA::RNAaliSplit::WrapAnalyseDists;
11              
12 1     1   1400 use version; our $VERSION = qv('0.04');
  1         2  
  1         7  
13 1     1   73 use Carp;
  1         1  
  1         62  
14 1     1   4 use Data::Dumper;
  1         1  
  1         36  
15 1     1   4 use Moose;
  1         2  
  1         6  
16 1     1   4734 use Moose::Util::TypeConstraints;
  1         2  
  1         9  
17 1     1   1263 use Path::Class::File;
  1         1  
  1         20  
18 1     1   3 use Path::Class::Dir;
  1         2  
  1         20  
19 1     1   3 use Path::Class;
  1         1  
  1         62  
20 1     1   4 use IPC::Cmd qw(can_run run);
  1         1  
  1         48  
21 1     1   627 use Array::Set qw(set_diff);
  1         1332  
  1         57  
22 1     1   6 use Digest::MD5 qw(md5_base64);
  1         2  
  1         1454  
23              
24             my ($analysedists,$oodir);
25             my %sets = ();
26              
27             has 'basename' => (
28             is => 'rw',
29             isa => 'Str',
30             predicate => 'has_basename',
31             );
32              
33             has 'splits' => (
34             is => 'rw',
35             isa => 'ArrayRef',
36             default => sub { [] },
37             predicate => 'has_splits',
38             traits => ['Array'],
39             handles => {
40             allsplits => 'elements',
41             count => 'count',
42             add => 'push',
43             pop => 'pop',
44             },
45             );
46              
47             has 'nr_splits' => (
48             is => 'rw',
49             isa => 'Num',
50             predicate => 'has_nr_splits',
51             );
52              
53             has 'dim' => (
54             is => 'rw',
55             isa => 'Num',
56             predicate => 'has_dim',
57             );
58              
59             with 'Bio::RNA::RNAaliSplit::FileDir';
60              
61             sub BUILD {
62 0     0 0   my $self = shift;
63 0           my $this_function = (caller(0))[3];
64 0 0         confess "ERROR [$this_function] \$self->ifile not available"
65             unless ($self->has_ifile);
66 0 0         $analysedists = can_run('AnalyseDists') or
67             croak "ERROR [$this_function] AnalyseDists not found";
68 0 0         unless($self->has_odir){
69 0 0         unless($self->has_odirn){self->odirname("as")}
  0            
70 0           $self->odir( [$self->ifile->dir,$self->odirn] );
71 0           mkdir($self->odir);
72             }
73 0           $oodir = $self->odir->subdir("analysedists");
74 0           mkdir($oodir);
75 0           $self->dim( $self->_get_dim() );
76              
77             # do computation
78 0           $self->NeighborJoining();
79 0           $self->SplitDecomposition();
80 0           $self->nr_splits($self->count);
81             }
82              
83             sub NeighborJoining {
84             # TODO warn if negative branch lengths occur
85 0     0 0   my $self = shift;
86 0           my $this_function = (caller(0))[3];
87 0           my ($nj_outfilename,$nj_treefilename,$nj_out,$nj_tree);
88              
89 0 0         if ($self->has_basename){
90 0           $nj_outfilename = $self->basename.".nj.out";
91 0           $nj_treefilename = $self->basename.".nj.ps";
92             }
93             else{
94 0           $nj_outfilename = "nj.out";
95 0           $nj_treefilename = "nj.ps";
96             }
97 0           $nj_out = file($oodir,$nj_outfilename);
98 0           $nj_tree = file($oodir,$nj_treefilename);
99 0           open my $fh, ">", $nj_out;
100              
101 0           my $ad_cmd = $analysedists." -Xn < ".$self->ifile;
102 0           my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
103             run( command => $ad_cmd, verbose => 0 );
104 0 0         if( !$success ) {
105 0           print STDERR "ERROR [$this_function] Call to $analysedists unsuccessful\n";
106 0           print STDERR "ERROR: this is what the command printed:\n";
107 0           print join "", @$full_buf;
108 0           croak $!;
109             }
110 0           my $stdout_buffer = join "",@$stdout_buf;
111 0           my @out = split /\n/, $stdout_buffer;
112 0           foreach my $line( @out){print $fh $line,"\n"}
  0            
113 0           close($fh);
114 0           rename "nj.ps", $nj_tree;
115 0           $self->_parse_nj($stdout_buffer);
116             }
117              
118             # parse the output of AnalyseDists -Xn
119             # populate array of hashes, each holding two sets of nodes corresponding to splits
120             sub _parse_nj {
121 0     0     my ($self,$nj) = @_;
122 0           my $this_function = (caller(0))[3];
123 0           my %data = ();
124 0           my $num;
125 0           my $count = 1;
126 0           my @lines = split /\n/,$nj;
127 0           foreach my $line (@lines){
128 0           my @s1 = ();
129 0           my @set1 = ();
130 0           my @set2 = ();
131 0 0         next if ($line =~ m/^>\s+\D/);
132 0 0         if ($line =~ m/^>\s+(\d+)/){$num = $1;next}
  0            
  0            
133 0 0         last if ($count++ >= $num);
134 0           my @all = (1..$num);
135             #print " #### $line\n";
136 0 0         croak "ERROR [$this_function] Cannot parse neighbor joining graph line\n$line\n"
137             unless ($line =~ m/^\s*(\d+)\s+(\d+)\s+(\-?\d+\.\d+)\s+(\-?\d+\.\d+)/g);
138 0           my $i = $1;
139 0           my $j = $2;
140              
141 0           push @{$data{$i}}, $j;
  0            
142 0 0         if (exists $data{$j}){push @{$data{$i}}, @{$data{$j}} };
  0            
  0            
  0            
143             # print Dumper(\%data);
144 0           push @s1, $i; # populate set1
145 0           push @s1, @{$data{$i}};
  0            
146 0           @set1 = sort {$a <=> $b} @s1;
  0            
147 0           my @diff = set_diff(\@all, \@set1);
148 0           @set2 = sort {$a <=> $b} @{$diff[0]};
  0            
  0            
149 0           my $set1_key = md5_base64(join "_", @set1);
150 0           my $set2_key = md5_base64(join "_", @set2);
151 0 0 0       if (!exists($sets{$set1_key}) && !exists($sets{$set2_key})){
152 0           my $type;
153 0           $sets{$set1_key} = \@set1; # lookup table for previously seen sets
154 0           $sets{$set2_key} = \@set2;
155 0 0         next if (scalar(@set1) == "0"); # skip empty sets (ie input alignment)
156 0 0         next if (scalar(@set2) == "0");
157 0 0 0       if(scalar(@set1)==1||scalar(@set2)==1){$type="NJT"} # trivial
  0            
158 0           else{$type="NJN"} # non-trivial
159 0           $self->add( {S1=>\@set1,S2=>\@set2,ori=>"NJ",type=>$type} );
160             }
161             else{
162             #print STDERR "INFO [$this_function] previously identified sets \n@set1\n@set2\n";
163             }
164             # print Dumper(\@set1);
165             # print Dumper(\@set2);
166             # print "+++++++++++++++++++++++++++++++++++\n";
167             }
168             }
169              
170             sub SplitDecomposition {
171 0     0 0   my $self = shift;
172 0           my $this_function = (caller(0))[3];
173 0           my ($sd_outfilename,$sd_out);
174 0 0         if ($self->has_basename){$sd_outfilename = $self->basename.".sd.out"}
  0            
175 0           else{$sd_outfilename = "sd.out"}
176 0           $sd_out = file($oodir,$sd_outfilename);
177 0           open my $fh, ">", $sd_out;
178              
179 0           my $sd_cmd = $analysedists." -Xs < ".$self->ifile;
180 0           my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
181             run( command => $sd_cmd, verbose => 0 );
182 0 0         if( !$success ) {
183 0           print STDERR "ERROR [$this_function] Call to $analysedists unsuccessful\n";
184 0           print STDERR "ERROR: this is what the command printed:\n";
185 0           print join "", @$full_buf;
186 0           croak $!;
187             }
188 0           my $stdout_buffer = join "", @$stdout_buf;
189 0           my @out = split /\n/, $stdout_buffer;
190 0           foreach my $line( @out){print $fh $line,"\n"}
  0            
191 0           close($fh);
192 0           $self->_parse_sd($stdout_buffer); # parse split graph data
193             }
194              
195             # parse the output of AnalyseDists -Xs
196             # populate array of hashes, each holding two sets of nodes corresponding to splits
197             sub _parse_sd {
198 0     0     my ($self,$sd) = @_;
199 0           my $this_function = (caller(0))[3];
200 0           my $num;
201 0           my @lines = split /\n/,$sd;
202 0           foreach my $line (@lines){
203 0 0         next if ($line =~ m/^>\s+\D/);
204 0 0         if ($line =~ m/^>\s+(\d+)/){$num = $1;next}
  0            
  0            
205 0 0         last if ($line =~ m/^\s*\d+\.\d+\s+\:\s+\{\s+\[Split prime fraction\]\s+\}/g );
206             # print "$line\n";
207 0 0         croak "ERROR [$this_function] Cannot parse split graph line\n$line\n"
208             unless ($line =~ m/^\s*\d+\s+\d+\.\d+\s+:\s+\{\s+([\d+\s+]+)\|/g);
209 0           my @foo = split /\s+/, $1; # set 1
210 0           my @moo = (1 .. $self->dim);
211 0           my @bar = (); # set 2
212 0           foreach my $i (@moo){
213 0 0         push (@bar, $i) unless ( grep {$i == $_}@foo );
  0            
214             }
215 0           my @set1 = sort {$a <=> $b} @foo;
  0            
216 0           my @set2 = sort {$a <=> $b} @bar;
  0            
217 0           my $set1_key = md5_base64(join "_", @set1);
218 0           my $set2_key = md5_base64(join "_", @set2);
219 0 0 0       if (!exists($sets{$set1_key}) && !exists($sets{$set2_key})){
220 0           my $type;
221 0           $sets{$set1_key} = \@set1; # lookup table for previously seen sets
222 0           $sets{$set2_key} = \@set2;
223 0 0 0       if (scalar(@set1)==1 || scalar(@set2)==1){$type="SDT"} # trivial calse
  0            
224 0           else {$type="SDN"}
225 0           $self->add( {S1=>\@set1,S2=>\@set2,ori=>"SD",type=>$type} );
226             }
227             else{
228             # print STDERR "INFO [$this_function] previously identified sets \n@set1\n@set2\n";
229             }
230             }
231             }
232              
233             sub _get_dim {
234 0     0     my $self = shift;
235 0           my $this_function = (caller(0))[3];
236 0           my $dim = -1 ;
237 0 0         open my $fh, "<", $self->ifile or die $!;
238 0           while(<$fh>){
239 0 0         if (m/^>\s+X\s+(\d+)/){$dim = $1;last;}
  0            
  0            
240             }
241 0 0         croak "ERROR [$this_function] could not parse dimension from input matrix"
242             if ($dim == -1);
243 0           close($fh);
244 0           return $dim;
245             }
246              
247             1;
248