File Coverage

blib/lib/Bio/RNA/Barriers/Results.pm
Criterion Covered Total %
statement 82 140 58.5
branch 6 18 33.3
condition 2 12 16.6
subroutine 20 29 68.9
pod 16 16 100.0
total 126 215 58.6


line stmt bran cond sub pod time code
1             package Bio::RNA::Barriers::Results;
2             our $VERSION = '0.03';
3              
4 12     12   292 use 5.012;
  12         49  
5 12     12   80 use strict;
  12         36  
  12         303  
6 12     12   105 use warnings;
  12         29  
  12         428  
7              
8 12     12   71 use Moose;
  12         26  
  12         82  
9 12     12   87171 use MooseX::StrictConstructor;
  12         36  
  12         106  
10 12     12   40122 use namespace::autoclean;
  12         44  
  12         416  
11              
12 12     12   1222 use autodie qw(:all);
  12         48  
  12         97  
13 12     12   71073 use Scalar::Util qw( reftype );
  12         37  
  12         818  
14 12     12   614 use File::Spec::Functions qw(catpath splitpath);
  12         928  
  12         1856  
15              
16             has [qw( seq file_name )] => (
17             is => 'ro',
18             required => 1,
19             );
20              
21             has _mins => (
22             is => 'ro',
23             required => 1,
24             init_arg => 'mins'
25             );
26              
27             has $_ => (
28             is => 'ro',
29             predicate => "has_$_",
30             ) foreach qw(_volume _directory);
31              
32              
33 12     12   95 use overload q{""} => 'stringify';
  12         27  
  12         124  
34              
35             # Allow various calling styles of the constructor:
36             # new(barriers_handle): pass only file handle to read data from, file_name
37             # will be undef.
38             # new(barriers_handle, barriers_file_path): read data from handle and set
39             # file name from a given path
40             # new( barriers_file_path): open handle and read data from the passed
41             # path, set file name accordingly
42             # new(hash_ref): manual construction from required attributes
43             around BUILDARGS => sub {
44             my $orig = shift;
45             my $class = shift;
46              
47             return $class->$orig(@_) if @_ == 0 or @_ > 2; # no special handling
48              
49             # Read Barriers file from handle and set constructor arguments.
50             my $barriers_fh;
51             my @args;
52             if (@_ == 1 and not reftype $_[0]) { # file path given
53             my $file_path = shift;
54             my ($volume, $directory, $file_name) = splitpath $file_path;
55             push @args, (
56             _volume => $volume,
57             _directory => $directory,
58             file_name => $file_name,
59             );
60              
61             open $barriers_fh, '<', $file_path;
62             }
63             elsif (reftype $_[0] eq reftype \*STDIN) { # file handle given
64             if (@_ == 2) {
65             # Check if second arg may be a file name
66             return $class->$orig(@_) if reftype $_[1]; # it's no file name
67             push @args, (file_name => $_[1]);
68             }
69             else {
70             push @args, (file_name => undef); # no file name given
71             }
72             $barriers_fh = shift;
73             }
74             else {
75             return $class->$orig(@_);
76             }
77              
78             # Parse data from handle and create arguments.
79             push @args, $class->_read_barriers_file($barriers_fh);
80             return $class->$orig(@args);
81             };
82              
83             sub _read_barriers_file {
84 30     30   86 my ($self, $barriers_fh) = @_;
85 30         55 my %args;
86              
87             # Read sequence from first line
88 30         60 my $sequence = do {
89 30         812 my $line = <$barriers_fh>;
90 30 50       153 confess 'Could not read sequence from Barriers file handle'
91             unless defined $line;
92 30         102 chomp $line;
93 30         238 $line =~ s/^\s*//r; # trim leading space
94             };
95 30         130 $args{seq} = $sequence;
96              
97             # Continue reading minima
98 30         71 my ($father, @mins);
99 30         179 while (defined (my $line = <$barriers_fh>)) {
100 581         18230 my $min = Bio::RNA::Barriers::Minimum->new($line);
101 578         3603 push @mins, $min;
102             }
103              
104 27 50       113 confess 'No minima found in Barriers file'
105             unless @mins;
106              
107             # Set references to father mins (if any). This needs to be done after
108             # minima construction because, if several minima share the same
109             # energy, a father may have a higher index than the doughter min (and
110             # thus not have been parsed when parsing the doughter).
111 27         81 foreach my $min (@mins) {
112 555 100       1446 if ($min->has_father) {
113 526         15623 my $father = $mins[$min->father_index - 1];
114 526 50 33     15897 confess 'Inconsistent father indexing in Barriers file'
115             unless ref $father
116             and $father->index == $min->father_index;
117 526         15659 $min->father($father);
118             }
119             }
120              
121 27         90 $args{mins} = \@mins;
122              
123 27         192 return %args;
124             }
125              
126             # Dereferenced list of all minima. Select individual minima using
127             # get_min[s]().
128             sub mins {
129 405     405 1 761 my $self = shift;
130 405         606 return @{ $self->_mins };
  405         14079  
131             }
132              
133             sub get_mins {
134 372     372 1 795 my ($self, @min_indices) = @_;
135              
136 372         769 foreach my $min_index (@min_indices) {
137 372 50 33     1113 confess "There is no minimum $min_index in this file"
138             if $min_index < 1 or $min_index > $self->mins;
139 372         818 $min_index--; # Perl is 0-based, barriers 1-based
140             }
141              
142 372         607 my @mins = @{$self->_mins}[@min_indices]; # array slice
  372         11021  
143 372         875 return @mins;
144             }
145              
146             sub get_min {
147 372     372 1 85758 my ($self, $min_index) = @_;
148 372         822 my ($min) = $self->get_mins($min_index);
149 372         4518 return $min;
150             }
151              
152             sub get_global_min {
153 8     8 1 20 my ($self) = @_;
154 8         16 my $mfe_min = $self->get_min(1); # basin 1 == mfe basin
155 8         244 return $mfe_min;
156             }
157              
158             # Get the global minimum free energy.
159             sub mfe {
160 4     4 1 62 my ($self) = @_;
161 4         23 my $mfe = $self->get_global_min->mfe;
162 4         74 return $mfe;
163             }
164              
165             # Get the delta_E of the landscape, i. e. the energy difference between the
166             # (global) minimum free energy and and the highest energy encountered.
167             sub delta_energy {
168 4     4 1 1229 my ($self) = @_;
169             # Barrier height of the mfe basin is the explored energy bandwidth
170 4         9 my $delta_energy = $self->get_global_min->barrier_height;
171 4         97 return $delta_energy;
172             }
173              
174             # Get the maximum energy of any structure in the landscape.
175             sub max_energy {
176 2     2 1 1100 my ($self) = @_;
177             # Energy values from Bar file have only 2 digits precision.
178 2         9 my $max_energy = sprintf "%.2f", $self->mfe + $self->delta_energy;
179 2         68 return $max_energy;
180             }
181              
182             sub min_count {
183 9     9 1 6878 my $self = shift;
184 9         44 my $min_count = $self->mins;
185 9         59 return $min_count;
186             }
187              
188             # List of all minima connected to the mfe minimum (min 1).
189             sub connected_mins {
190 0     0 1 0 my $self = shift;
191 0         0 my @connected_mins = grep {$_->is_connected} $self->mins;
  0         0  
192 0         0 return @connected_mins;
193             }
194              
195             # List of indices of all connected minima (cf. connected_mins()).
196             sub connected_indices {
197 0     0 1 0 my $self = shift;
198 0         0 my @connected_indices = map {$_->index} $self->connected_mins;
  0         0  
199 0         0 return @connected_indices;
200             }
201              
202             # Re-index minima after deleting some of them.
203             sub _reindex {
204 0     0   0 my $self = shift;
205              
206 0         0 my $i = 1;
207 0         0 my @mins = $self->mins;
208              
209             # Update min indices.
210 0         0 $_->index($i++) foreach @mins;
211              
212             # Update father indices.
213 0         0 shift @mins; # min 1 is orphan
214 0         0 $_->father_index($_->father->index) foreach @mins;
215              
216 0         0 return;
217             }
218              
219             # Keep only connected minima and remove all others. The indices are
220             # remapped to 1..k for k kept minima.
221             # Returns (old) indices of all kept minima.
222             sub keep_connected {
223 0     0 1 0 my $self = shift;
224              
225 0         0 my @connected_indices = $self->connected_indices;
226 0         0 my @connected_mins = $self->connected_mins;
227              
228             # Update minima list
229 0         0 @{ $self->_mins } = @connected_mins;
  0         0  
230 0         0 $self->_reindex;
231              
232 0         0 return @connected_indices;
233             }
234              
235             # Given an ordered list of indices of all connected minima (as returned by
236             # RateMatrix::keep_connected()), delete all other minima and update their
237             # ancesters' basin size information accordingly.
238             # Arguments:
239             # ordered_connected_indices: ordered list of indices of (all???)
240             # connected minima.
241             sub update_connected {
242 0     0 1 0 my ($self, @ordered_connected_indices) = @_;
243              
244             # Go through all mins and check whether they're next in the connected
245             # (==kept) index list. If not, add to removal list.
246 0         0 my @connected_mins = $self->get_mins(@ordered_connected_indices);
247 0         0 my @removed_indices;
248 0         0 for my $min_index (1..$self->min_count) {
249 0 0       0 unless (@ordered_connected_indices) {
250             # No exclusions left, add rest and stop
251 0         0 push @removed_indices, $min_index..$self->min_count;
252 0         0 last;
253             }
254 0 0       0 if ($min_index == $ordered_connected_indices[0]) {
255 0         0 shift @ordered_connected_indices;
256 0         0 next;
257             }
258 0         0 push @removed_indices, $min_index; # min is deleted
259             }
260              
261 0         0 my @removed_mins = $self->get_mins(@removed_indices);
262 0         0 $self->_update_ancestors(@removed_mins);
263 0         0 @{ $self->_mins } = @connected_mins;
  0         0  
264 0         0 $self->_reindex;
265              
266 0         0 return;
267             }
268              
269             # Pass a list of ORDERED deleted minima and update their ancestors' bsize
270             # attributes.
271             sub _update_ancestors {
272 0     0   0 my ($self, @removed_mins) = @_;
273              
274             # If the bsize attributes are present, update the basin energy of all
275             # (grand)* father basins (substract energy of this one).
276             # The minima need to be processed in reversed order because, if an
277             # ancester of a removed min is also removed, its merged basin energy
278             # includes the basin energy of its child, and thus this contribution
279             # would be substracted multiple times from older ancesters. In
280             # reversed order, the child contribution is first substracted from the
281             # ancestors, and then the contribution of the removed ancestors does
282             # not include the child anymore.
283              
284 0 0       0 return unless $self->has_bsize;
285              
286 0         0 foreach my $removed_min (reverse @removed_mins) {
287 0         0 foreach my $ancestor_min ($removed_min->ancestors) {
288 0         0 $ancestor_min->_merged_basin_energy( # private writer
289             $ancestor_min->merged_basin_energy
290             - $removed_min->grad_basin_energy
291             );
292             }
293             }
294              
295 0         0 return;
296             }
297              
298             # Keep only the first k mins. If there are only k or less mins, do
299             # nothing.
300             # WARNING: THIS CAN DISCONNECT THE LANDSCAPE! The bar file will still look
301             # connected, however, modifying the rate matrix accordingly can lead to
302             # non-ergodicity (e.g. when basin 3 merged to 2, 2/3 merged to 1 because
303             # of a possible transition from 3 to 1, and basin 3 is then removed).
304             # To cope with that, call RateMatrix::keep_connected() and, with its
305             # return value, Results::update_connected() again.
306             # Arguments:
307             # max_min_count: number of minima to keep.
308             # Returns a list of all removed mins (may be empty).
309             sub keep_first_mins {
310 0     0 1 0 my ($self, $max_min_count) = @_;
311              
312 0         0 my @removed_mins = splice @{ $self->_mins }, $max_min_count;
  0         0  
313 0         0 $self->_update_ancestors(@removed_mins);
314              
315              
316 0         0 return @removed_mins;
317             }
318              
319              
320             # Check whether the stored minima contain information about the basin
321             # sizes as computed by Barriers' --bsize switch. Checks only the mfe
322             # basin (since all or neither min should have this information).
323             sub has_bsize {
324 0     0 1 0 my ($self) = @_;
325 0         0 my $has_bsize = $self->get_global_min->has_bsize;
326 0         0 return $has_bsize;
327             }
328              
329              
330             # Construct the file path to this Barriers file. Works only if it was
331             # actually parsed from a file (of course...).
332             # Returns the path as a string.
333             sub path {
334 0     0 1 0 my $self = shift;
335 0 0 0     0 confess 'Cannot construct path, missing volume, directory or file name'
      0        
336             unless defined $self->file_name
337             and $self->has_volume
338             and $self->has_directory
339             ;
340 0         0 my ($volume, $dir, $file_name)
341             = ($self->_volume, $self->_directory, $self->file_name);
342 0         0 my $path = catpath($volume, $dir, $file_name);
343 0         0 return $path;
344             }
345              
346              
347             # Convert back to Barriers file.
348             sub stringify {
349 12     12 1 2597 my $self = shift;
350              
351 12         433 my $header = (q{ } x 5) . $self->seq;
352 12         51 my @lines = ($header, map { $_->stringify } $self->mins);
  264         798  
353              
354 12         356 return join "\n", @lines;
355             }
356              
357             __PACKAGE__->meta->make_immutable;
358              
359             1; # End of Bio::RNA::Barriers::Results
360              
361              
362             __END__
363              
364             =pod
365              
366             =encoding UTF-8
367              
368             =head1 NAME
369              
370             Bio::RNA::Barriers::Results - Parse, query and manipulate results
371             of a I<Barriers> run
372              
373             =head1 SYNOPSIS
374              
375             use Bio::RNA::Barriers;
376              
377             # Read in a Barriers output file.
378             open my $barriers_handle, '<', $barriers_file;
379             my $bardat = Bio::RNA::Barriers::Results->new($barriers_handle);
380             # Or, even simpler, pass file name directly:
381             $bardat = Bio::RNA::Barriers::Results->new($barriers_file );
382              
383             # Print some info
384             print "There are ", $bardat->min_count, " minima.";
385             my $min3 = $bardat->get_min(3);
386             print $min3->grad_struct_count,
387             " structures lead to basin 3 via a gradient walk.\n"
388             if $min3->has_bsize;
389             print "Min ", $min3->index, " is ", ($min3->is_connected ? "" : "NOT"),
390             " connected to the mfe structure.\n";
391              
392             # Print the mfe basin line as in the results file
393             my $mfe_min = $bardat->get_global_min();
394             print "$mfe_min\n";
395              
396              
397             =head1 DESCRIPTION
398              
399             This is what you usually want to use. Pass a file name or a handle to the
400             constructor and you're golden. When querying a specific minimum by index, a
401             L<Bio::RNA::Barriers::Minimum> object is returned. For more details on its
402             methods, please refer to its documentation.
403              
404             =head1 METHODS
405              
406             =head3 Bio::RNA::Barriers::Results->new()
407              
408             Constructs the results object from a Barriers results file.
409              
410             =over 4
411              
412             =item Supported argument list types:
413              
414             =over 4
415              
416             =item new($barriers_file_path):
417              
418             Open handle and read data from the passed path, set file name accordingly.
419              
420             =item new($barriers_handle):
421              
422             A file handle to read data from, C<file_name()> will return
423             undef.
424              
425             =item new($barriers_handle, $barriers_file_path):
426              
427             Read data from handle and set file name from a given path.
428              
429             =item new($hash_ref):
430              
431             Manual construction from required attributes. Only for experts.
432              
433             =back
434              
435             =back
436              
437             =head3 $res->seq()
438              
439             Returns the RNA sequence for which the minima have been computed.
440              
441             =head3 $res->file_name()
442              
443             Name of the file from which the minima have been read. May be C<undef>.
444              
445             =head3 $res->mins()
446              
447             Returns a list of all minima. Useful for iteration in C<for> loops.
448              
449             =head3 $res->get_min($index)
450              
451             Return the single minimum given by a 1-based C<$index> (as used in the results
452             file).
453              
454             =head3 $res->get_mins(@indices)
455              
456             Return a list of minima given by a 1-based list of C<@indices> (as used in the
457             results file).
458              
459             =head3 $res->get_global_min()
460              
461             Returns the basin represented by the (global) mfe structure (i.e. basin 1).
462              
463             =head3 $res->mfe()
464              
465             Get the global minimum free energy.
466              
467             =head3 $res->delta_energy()
468              
469             Get the delta_E of the landscape, i. e. the energy difference between the
470             (global) minimum free energy and and the highest energy encountered.
471              
472             =head3 $res->max_energy()
473              
474             Get the maximum energy of any structure in the landscape.
475              
476             =head3 $res->min_count()
477              
478             Returns the total number of basins.
479              
480             =head3 $res->connected_mins()
481              
482             Returns a list of all minima connected to the mfe minimum (min 1).
483              
484             =head3 $res->connected_indices()
485              
486             Returns a list of indices of all connected minima (cf. C<connected_mins()>).
487              
488             =head3 $res->keep_connected()
489              
490             Keep only connected minima and remove all others. The indices are remapped to
491             1..k for k kept minima. Returns (old) indices of all kept minima.
492              
493             =head3 $res->update_connected(@connected_mins)
494              
495             Given an ordered list of indices of all connected minima (as returned by
496             C<RateMatrix::keep_connected()>), delete all other minima and update their
497             ancesters' basin size information accordingly.
498              
499             =head3 $res->keep_first_mins($min_count)
500              
501             Keep only the first C<$min_count> mins. (If there are only that many or less
502             minima in total, do nothing.)
503              
504             WARNING: THIS CAN DISCONNECT THE LANDSCAPE! The bar file will still look
505             connected, however, modifying the rate matrix accordingly can lead to
506             non-ergodicity (e.g. when basin 3 merged to 2, 2/3 merged to 1 because
507             of a possible transition from 3 to 1, and basin 3 is then removed). Returns a
508             list of all removed mins (may be empty).
509              
510             =head3 $res->has_bsize()
511              
512             Check whether the stored minima contain information about the basin sizes as
513             computed by Barriers' --bsize switch. Checks only the mfe basin (since all or
514             neither min should have this information).
515              
516             =head3 $res->path()
517              
518             Construct the file path to this Barriers file. Works only if it was actually
519             parsed from a file (of course...). Returns the path as a string.
520              
521             =head3 $res->stringify()
522              
523             Convert back to Barriers file. Also supports stringification overloading, i.
524             e. C<"$res"> is equivalent to C<$res-E<gt>stringify()>.
525              
526              
527             =head1 AUTHOR
528              
529             Felix Kuehnl, C<< <felix at bioinf.uni-leipzig.de> >>
530              
531             =head1 BUGS
532              
533             Please report any bugs or feature requests by raising an issue at
534             L<https://github.com/xileF1337/Bio-RNA-Barriers/issues>.
535              
536             You can also do so by mailing to C<bug-bio-rna-barmap at rt.cpan.org>,
537             or through the web interface at
538             L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-BarMap>. I will be
539             notified, and then you'll automatically be notified of progress on your bug as
540             I make changes.
541              
542              
543             =head1 SUPPORT
544              
545             You can find documentation for this module with the perldoc command.
546              
547             perldoc Bio::RNA::Barriers
548              
549              
550             You can also look for information at the official Barriers website:
551              
552             L<https://www.tbi.univie.ac.at/RNA/Barriers/>
553              
554              
555             =over 4
556              
557             =item * Github: the official repository
558              
559             L<https://github.com/xileF1337/Bio-RNA-Barriers>
560              
561             =item * RT: CPAN's request tracker (report bugs here)
562              
563             L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-Barriers>
564              
565             =item * AnnoCPAN: Annotated CPAN documentation
566              
567             L<http://annocpan.org/dist/Bio-RNA-Barriers>
568              
569             =item * CPAN Ratings
570              
571             L<https://cpanratings.perl.org/d/Bio-RNA-Barriers>
572              
573             =item * Search CPAN
574              
575             L<https://metacpan.org/release/Bio-RNA-Barriers>
576              
577             =back
578              
579              
580             =head1 LICENSE AND COPYRIGHT
581              
582             Copyright 2019-2021 Felix Kuehnl.
583              
584             This program is free software: you can redistribute it and/or modify
585             it under the terms of the GNU General Public License as published by
586             the Free Software Foundation, either version 3 of the License, or
587             (at your option) any later version.
588              
589             This program is distributed in the hope that it will be useful,
590             but WITHOUT ANY WARRANTY; without even the implied warranty of
591             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
592             GNU General Public License for more details.
593              
594             You should have received a copy of the GNU General Public License
595             along with this program. If not, see L<http://www.gnu.org/licenses/>.
596              
597              
598              
599             =cut
600              
601              
602             # End of lib/Bio/RNA/Barriers/Results.pm