File Coverage

blib/lib/Bio/RNA/RNAaliSplit/WrapAnalyseDists.pm
Criterion Covered Total %
statement 27 176 15.3
branch 0 56 0.0
condition 0 12 0.0
subroutine 9 15 60.0
pod 0 3 0.0
total 36 262 13.7


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2017-05-28 17:05:11 mtw>
3              
4             # Bio::RNA::RNAaliSplit::WrapAnalyseDists.pm: Wrapper for computing
5             # split decompositions
6             #
7             # Requires AnalyseDists executable from the ViennaRNA poackage
8             # available to the Perl interpreter
9              
10             package Bio::RNA::RNAaliSplit::WrapAnalyseDists;
11              
12 1     1   1666 use version; our $VERSION = qv('0.05');
  1         2  
  1         9  
13 1     1   137 use Carp;
  1         3  
  1         68  
14 1     1   14 use Data::Dumper;
  1         2  
  1         45  
15 1     1   5 use Moose;
  1         2  
  1         7  
16 1     1   9950 use IPC::Cmd qw(can_run run);
  1         4  
  1         111  
17 1     1   944 use Array::Set qw(set_diff);
  1         2406  
  1         96  
18 1     1   14 use Digest::MD5 qw(md5_base64);
  1         3  
  1         70  
19 1     1   10 use Path::Class;
  1         5  
  1         103  
20 1     1   10 use File::Path qw(make_path);
  1         3  
  1         2723  
21             #use diagnostics;
22              
23             my ($analysedists,$oodir);
24             my %sets = ();
25              
26             has 'basename' => (
27             is => 'rw',
28             isa => 'Str',
29             predicate => 'has_basename',
30             );
31              
32             has 'splits' => (
33             is => 'rw',
34             isa => 'ArrayRef',
35             default => sub { [] },
36             predicate => 'has_splits',
37             traits => ['Array'],
38             handles => {
39             allsplits => 'elements',
40             count => 'count',
41             add => 'push',
42             pop => 'pop',
43             },
44             );
45              
46             has 'nr_splits' => (
47             is => 'rw',
48             isa => 'Num',
49             predicate => 'has_nr_splits',
50             );
51              
52             has 'dim' => (
53             is => 'rw',
54             isa => 'Num',
55             predicate => 'has_dim',
56             );
57              
58             with 'FileDirUtil';
59             with 'Bio::RNA::RNAaliSplit::Roles';
60              
61             sub BUILD {
62 0     0 0   my $self = shift;
63 0           my $this_function = (caller(0))[3];
64 0 0         confess "ERROR [$this_function] \$self->ifile not available"
65             unless ($self->has_ifile);
66 0 0         $analysedists = can_run('AnalyseDists') or
67             croak "ERROR [$this_function] AnalyseDists not found";
68 0 0         unless($self->has_odir){
69 0 0         unless($self->has_dirnam){self->dirnam("as")}
  0            
70 0           $self->odir( [$self->ifile->dir,$self->dirnam] );
71             }
72 0           $oodir = $self->odir->subdir("analysedists");
73 0           my @created = make_path($oodir, {error => \my $err});
74 0 0         confess "ERROR [$this_function] could not create output directory $self->oodir"
75             if (@$err);
76 0           $self->dim( $self->_get_dim() );
77              
78             # do computation
79 0           $self->NeighborJoining();
80 0           $self->SplitDecomposition();
81 0           $self->nr_splits($self->count);
82             }
83              
84             sub NeighborJoining {
85             # TODO warn if negative branch lengths occur
86 0     0 0   my $self = shift;
87 0           my $this_function = (caller(0))[3];
88 0           my ($nj_outfilename,$nj_treefilename,$nj_out,$nj_tree);
89              
90 0 0         if ($self->has_basename){
91 0           $nj_outfilename = $self->basename.".nj.out";
92 0           $nj_treefilename = $self->basename.".nj.ps";
93             }
94             else{
95 0           $nj_outfilename = "nj.out";
96 0           $nj_treefilename = "nj.ps";
97             }
98 0           $nj_out = file($oodir,$nj_outfilename);
99 0           $nj_tree = file($oodir,$nj_treefilename);
100 0           open my $fh, ">", $nj_out;
101              
102 0           my $ad_cmd = $analysedists." -Xn < ".$self->ifile;
103 0           my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
104             run( command => $ad_cmd, verbose => 0 );
105 0 0         if( !$success ) {
106 0           print STDERR "ERROR [$this_function] Call to $analysedists unsuccessful\n";
107 0           print STDERR "ERROR: this is what the command printed:\n";
108 0           print join "", @$full_buf;
109 0           croak $!;
110             }
111 0           my $stdout_buffer = join "",@$stdout_buf;
112 0           my @out = split /\n/, $stdout_buffer;
113 0           foreach my $line( @out){print $fh $line,"\n"}
  0            
114 0           close($fh);
115 0           rename "nj.ps", $nj_tree;
116 0           $self->_parse_nj($stdout_buffer);
117             }
118              
119             # parse the output of AnalyseDists -Xn
120             # populate array of hashes, each holding two sets of nodes corresponding to splits
121             sub _parse_nj {
122 0     0     my ($self,$nj) = @_;
123 0           my $this_function = (caller(0))[3];
124 0           my %data = ();
125 0           my $num;
126 0           my $count = 1;
127 0           my @lines = split /\n/,$nj;
128 0           foreach my $line (@lines){
129 0           my @s1 = ();
130 0           my @set1 = ();
131 0           my @set2 = ();
132 0 0         next if ($line =~ m/^>\s+\D/);
133 0 0         if ($line =~ m/^>\s+(\d+)/){$num = $1;next}
  0            
  0            
134 0 0         last if ($count++ >= $num);
135 0           my @all = (1..$num);
136             #print " #### $line\n";
137 0 0         croak "ERROR [$this_function] Cannot parse neighbor joining graph line\n$line\n"
138             unless ($line =~ m/^\s*(\d+)\s+(\d+)\s+(\-?\d+\.\d+)\s+(\-?\d+\.\d+)/g);
139 0           my $i = $1;
140 0           my $j = $2;
141              
142 0           push @{$data{$i}}, $j;
  0            
143 0 0         if (exists $data{$j}){push @{$data{$i}}, @{$data{$j}} };
  0            
  0            
  0            
144             # print Dumper(\%data);
145 0           push @s1, $i; # populate set1
146 0           push @s1, @{$data{$i}};
  0            
147 0           @set1 = sort {$a <=> $b} @s1;
  0            
148 0           my @diff = set_diff(\@all, \@set1);
149 0           @set2 = sort {$a <=> $b} @{$diff[0]};
  0            
  0            
150 0           my $set1_key = md5_base64(join "_", @set1);
151 0           my $set2_key = md5_base64(join "_", @set2);
152 0 0 0       if (!exists($sets{$set1_key}) && !exists($sets{$set2_key})){
153 0           my $type;
154 0           $sets{$set1_key} = \@set1; # lookup table for previously seen sets
155 0           $sets{$set2_key} = \@set2;
156 0 0         next if (scalar(@set1) == "0"); # skip empty sets (ie input alignment)
157 0 0         next if (scalar(@set2) == "0");
158 0 0 0       if(scalar(@set1)==1||scalar(@set2)==1){$type="NJT"} # trivial
  0            
159 0           else{$type="NJN"} # non-trivial
160 0           $self->add( {S1=>\@set1,S2=>\@set2,ori=>"NJ",type=>$type} );
161             }
162             else{
163             #print STDERR "INFO [$this_function] previously identified sets \n@set1\n@set2\n";
164             }
165             # print Dumper(\@set1);
166             # print Dumper(\@set2);
167             # print "+++++++++++++++++++++++++++++++++++\n";
168             }
169             }
170              
171             sub SplitDecomposition {
172 0     0 0   my $self = shift;
173 0           my $this_function = (caller(0))[3];
174 0           my ($sd_outfilename,$sd_out);
175 0 0         if ($self->has_basename){$sd_outfilename = $self->basename.".sd.out"}
  0            
176 0           else{$sd_outfilename = "sd.out"}
177 0           $sd_out = file($oodir,$sd_outfilename);
178 0           open my $fh, ">", $sd_out;
179              
180 0           my $sd_cmd = $analysedists." -Xs < ".$self->ifile;
181 0           my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
182             run( command => $sd_cmd, verbose => 0 );
183 0 0         if( !$success ) {
184 0           print STDERR "ERROR [$this_function] Call to $analysedists unsuccessful\n";
185 0           print STDERR "ERROR: this is what the command printed:\n";
186 0           print join "", @$full_buf;
187 0           croak $!;
188             }
189 0           my $stdout_buffer = join "", @$stdout_buf;
190 0           my @out = split /\n/, $stdout_buffer;
191 0           foreach my $line( @out){print $fh $line,"\n"}
  0            
192 0           close($fh);
193 0           $self->_parse_sd($stdout_buffer); # parse split graph data
194             }
195              
196             # parse the output of AnalyseDists -Xs
197             # populate array of hashes, each holding two sets of nodes corresponding to splits
198             sub _parse_sd {
199 0     0     my ($self,$sd) = @_;
200 0           my $this_function = (caller(0))[3];
201 0           my $num;
202 0           my @lines = split /\n/,$sd;
203 0           foreach my $line (@lines){
204 0 0         next if ($line =~ m/^>\s+\D/);
205 0 0         if ($line =~ m/^>\s+(\d+)/){$num = $1;next}
  0            
  0            
206 0 0         last if ($line =~ m/^\s*\d+\.\d+\s+\:\s+\{\s+\[Split prime fraction\]\s+\}/g );
207             # print "$line\n";
208 0 0         croak "ERROR [$this_function] Cannot parse split graph line\n$line\n"
209             unless ($line =~ m/^\s*\d+\s+\d+\.\d+\s+:\s+\{\s+([\d+\s+]+)\|/g);
210 0           my @foo = split /\s+/, $1; # set 1
211 0           my @moo = (1 .. $self->dim);
212 0           my @bar = (); # set 2
213 0           foreach my $i (@moo){
214 0 0         push (@bar, $i) unless ( grep {$i == $_}@foo );
  0            
215             }
216 0           my @set1 = sort {$a <=> $b} @foo;
  0            
217 0           my @set2 = sort {$a <=> $b} @bar;
  0            
218 0           my $set1_key = md5_base64(join "_", @set1);
219 0           my $set2_key = md5_base64(join "_", @set2);
220 0 0 0       if (!exists($sets{$set1_key}) && !exists($sets{$set2_key})){
221 0           my $type;
222 0           $sets{$set1_key} = \@set1; # lookup table for previously seen sets
223 0           $sets{$set2_key} = \@set2;
224 0 0 0       if (scalar(@set1)==1 || scalar(@set2)==1){$type="SDT"} # trivial calse
  0            
225 0           else {$type="SDN"}
226 0           $self->add( {S1=>\@set1,S2=>\@set2,ori=>"SD",type=>$type} );
227             }
228             else{
229             # print STDERR "INFO [$this_function] previously identified sets \n@set1\n@set2\n";
230             }
231             }
232             }
233              
234             sub _get_dim {
235 0     0     my $self = shift;
236 0           my $this_function = (caller(0))[3];
237 0           my $dim = -1 ;
238 0 0         open my $fh, "<", $self->ifile or die $!;
239 0           while(<$fh>){
240 0 0         if (m/^>\s+X\s+(\d+)/){$dim = $1;last;}
  0            
  0            
241             }
242 0 0         croak "ERROR [$this_function] could not parse dimension from input matrix"
243             if ($dim == -1);
244 0           close($fh);
245 0           return $dim;
246             }
247              
248             1;
249