File Coverage

blib/lib/Bio/RNA/Barriers/Results.pm
Criterion Covered Total %
statement 70 131 53.4
branch 6 18 33.3
condition 2 12 16.6
subroutine 16 26 61.5
pod 13 13 100.0
total 107 200 53.5


line stmt bran cond sub pod time code
1             package Bio::RNA::Barriers::Results;
2             our $VERSION = '0.01';
3              
4 11     11   287 use 5.012;
  11         47  
5 11     11   66 use strict;
  11         25  
  11         290  
6 11     11   61 use warnings;
  11         25  
  11         384  
7              
8 11     11   62 use Moose;
  11         25  
  11         85  
9 11     11   80287 use MooseX::StrictConstructor;
  11         34  
  11         96  
10 11     11   38734 use namespace::autoclean;
  11         33  
  11         462  
11              
12 11     11   1167 use autodie qw(:all);
  11         26  
  11         91  
13 11     11   65741 use Scalar::Util qw( reftype );
  11         35  
  11         871  
14 11     11   631 use File::Spec::Functions qw(catpath splitpath);
  11         846  
  11         1556  
15              
16             has [qw( seq file_name )] => (
17             is => 'ro',
18             required => 1,
19             );
20              
21             has _mins => (
22             is => 'ro',
23             required => 1,
24             init_arg => 'mins'
25             );
26              
27             has $_ => (
28             is => 'ro',
29             predicate => "has_$_",
30             ) foreach qw(_volume _directory);
31              
32              
33 11     11   90 use overload q{""} => 'stringify';
  11         24  
  11         102  
34              
35             # Allow various calling styles of the constructor:
36             # new(barriers_handle): pass only file handle to read data from, file_name
37             # will be undef.
38             # new(barriers_handle, barriers_file_path): read data from handle and set
39             # file name from a given path
40             # new( barriers_file_path): open handle and read data from the passed
41             # path, set file name accordingly
42             # new(hash_ref): manual construction from required attributes
43             around BUILDARGS => sub {
44             my $orig = shift;
45             my $class = shift;
46              
47             return $class->$orig(@_) if @_ == 0 or @_ > 2; # no special handling
48              
49             # Read Barriers file from handle and set constructor arguments.
50             my $barriers_fh;
51             my @args;
52             if (@_ == 1 and not reftype $_[0]) { # file path given
53             my $file_path = shift;
54             my ($volume, $directory, $file_name) = splitpath $file_path;
55             push @args, (
56             _volume => $volume,
57             _directory => $directory,
58             file_name => $file_name,
59             );
60              
61             open $barriers_fh, '<', $file_path;
62             }
63             elsif (reftype $_[0] eq reftype \*STDIN) { # file handle given
64             if (@_ == 2) {
65             # Check if second arg may be a file name
66             return $class->$orig(@_) if reftype $_[1]; # it's no file name
67             push @args, (file_name => $_[1]);
68             }
69             else {
70             push @args, (file_name => undef); # no file name given
71             }
72             $barriers_fh = shift;
73             }
74             else {
75             return $class->$orig(@_);
76             }
77              
78             # Parse data from handle and create arguments.
79             push @args, $class->_read_barriers_file($barriers_fh);
80             return $class->$orig(@args);
81             };
82              
83             sub _read_barriers_file {
84 28     28   95 my ($self, $barriers_fh) = @_;
85 28         60 my %args;
86              
87             # Read sequence from first line
88 28         63 my $sequence = do {
89 28         750 my $line = <$barriers_fh>;
90 28 50       167 confess 'Could not read sequence from Barriers file handle'
91             unless defined $line;
92 28         116 chomp $line;
93 28         278 $line =~ s/^\s*//r; # trim leading space
94             };
95 28         490 $args{seq} = $sequence;
96              
97             # Continue reading minima
98 28         73 my ($father, @mins);
99 28         194 while (defined (my $line = <$barriers_fh>)) {
100 566         17811 my $min = Bio::RNA::Barriers::Minimum->new($line);
101 563         3649 push @mins, $min;
102             }
103              
104 25 50       128 confess 'No minima found in Barriers file'
105             unless @mins;
106              
107             # Set references to father mins (if any). This needs to be done after
108             # minima construction because, if several minima share the same
109             # energy, a father may have a higher index than the doughter min (and
110             # thus not have been parsed when parsing the doughter).
111 25         87 foreach my $min (@mins) {
112 540 100       1520 if ($min->has_father) {
113 514         15236 my $father = $mins[$min->father_index - 1];
114 514 50 33     15631 confess 'Inconsistent father indexing in Barriers file'
115             unless ref $father
116             and $father->index == $min->father_index;
117 514         15143 $min->father($father);
118             }
119             }
120              
121 25         111 $args{mins} = \@mins;
122              
123 25         283 return %args;
124             }
125              
126             # Dereferenced list of all minima. Select individual minima using
127             # get_min[s]().
128             sub mins {
129 397     397 1 796 my $self = shift;
130 397         540 return @{ $self->_mins };
  397         14627  
131             }
132              
133             sub get_mins {
134 364     364 1 772 my ($self, @min_indices) = @_;
135              
136 364         749 foreach my $min_index (@min_indices) {
137 364 50 33     1193 confess "There is no minimum $min_index in this file"
138             if $min_index < 1 or $min_index > $self->mins;
139 364         842 $min_index--; # Perl is 0-based, barriers 1-based
140             }
141              
142 364         622 my @mins = @{$self->_mins}[@min_indices]; # array slice
  364         11113  
143 364         987 return @mins;
144             }
145              
146             sub get_min {
147 364     364 1 94828 my ($self, $min_index) = @_;
148 364         825 my ($min) = $self->get_mins($min_index);
149 364         4572 return $min;
150             }
151              
152             sub get_global_min {
153 0     0 1 0 my ($self) = @_;
154 0         0 my $mfe_min = $self->get_min(1); # basin 1 == mfe basin
155 0         0 return $mfe_min;
156             }
157              
158             sub min_count {
159 9     9 1 8979 my $self = shift;
160 9         39 my $min_count = $self->mins;
161 9         50 return $min_count;
162             }
163              
164             # List of all minima connected to the mfe minimum (min 1).
165             sub connected_mins {
166 0     0 1 0 my $self = shift;
167 0         0 my @connected_mins = grep {$_->is_connected} $self->mins;
  0         0  
168 0         0 return @connected_mins;
169             }
170              
171             # List of indices of all connected minima (cf. connected_mins()).
172             sub connected_indices {
173 0     0 1 0 my $self = shift;
174 0         0 my @connected_indices = map {$_->index} $self->connected_mins;
  0         0  
175 0         0 return @connected_indices;
176             }
177              
178             # Re-index minima after deleting some of them.
179             sub _reindex {
180 0     0   0 my $self = shift;
181              
182 0         0 my $i = 1;
183 0         0 my @mins = $self->mins;
184              
185             # Update min indices.
186 0         0 $_->index($i++) foreach @mins;
187              
188             # Update father indices.
189 0         0 shift @mins; # min 1 is orphan
190 0         0 $_->father_index($_->father->index) foreach @mins;
191              
192 0         0 return;
193             }
194              
195             # Keep only connected minima and remove all others. The indices are
196             # remapped to 1..k for k kept minima.
197             # Returns (old) indices of all kept minima.
198             sub keep_connected {
199 0     0 1 0 my $self = shift;
200              
201 0         0 my @connected_indices = $self->connected_indices;
202 0         0 my @connected_mins = $self->connected_mins;
203              
204             # Update minima list
205 0         0 @{ $self->_mins } = @connected_mins;
  0         0  
206 0         0 $self->_reindex;
207              
208 0         0 return @connected_indices;
209             }
210              
211             # Given an ordered list of indices of all connected minima (as returned by
212             # RateMatrix::keep_connected()), delete all other minima and update their
213             # ancesters' basin size information accordingly.
214             # Arguments:
215             # ordered_connected_indices: ordered list of indices of (all???)
216             # connected minima.
217             sub update_connected {
218 0     0 1 0 my ($self, @ordered_connected_indices) = @_;
219              
220             # Go through all mins and check whether they're next in the connected
221             # (==kept) index list. If not, add to removal list.
222 0         0 my @connected_mins = $self->get_mins(@ordered_connected_indices);
223 0         0 my @removed_indices;
224 0         0 for my $min_index (1..$self->min_count) {
225 0 0       0 unless (@ordered_connected_indices) {
226             # No exclusions left, add rest and stop
227 0         0 push @removed_indices, $min_index..$self->min_count;
228 0         0 last;
229             }
230 0 0       0 if ($min_index == $ordered_connected_indices[0]) {
231 0         0 shift @ordered_connected_indices;
232 0         0 next;
233             }
234 0         0 push @removed_indices, $min_index; # min is deleted
235             }
236              
237 0         0 my @removed_mins = $self->get_mins(@removed_indices);
238 0         0 $self->_update_ancestors(@removed_mins);
239 0         0 @{ $self->_mins } = @connected_mins;
  0         0  
240 0         0 $self->_reindex;
241              
242 0         0 return;
243             }
244              
245             # Pass a list of ORDERED deleted minima and update their ancestors' bsize
246             # attributes.
247             sub _update_ancestors {
248 0     0   0 my ($self, @removed_mins) = @_;
249              
250             # If the bsize attributes are present, update the basin energy of all
251             # (grand)* father basins (substract energy of this one).
252             # The minima need to be processed in reversed order because, if an
253             # ancester of a removed min is also removed, its merged basin energy
254             # includes the basin energy of its child, and thus this contribution
255             # would be substracted multiple times from older ancesters. In
256             # reversed order, the child contribution is first substracted from the
257             # ancestors, and then the contribution of the removed ancestors does
258             # not include the child anymore.
259              
260 0 0       0 return unless $self->has_bsize;
261              
262 0         0 foreach my $removed_min (reverse @removed_mins) {
263 0         0 foreach my $ancestor_min ($removed_min->ancestors) {
264 0         0 $ancestor_min->_merged_basin_energy( # private writer
265             $ancestor_min->merged_basin_energy
266             - $removed_min->grad_basin_energy
267             );
268             }
269             }
270              
271 0         0 return;
272             }
273              
274             # Keep only the first k mins. If there are only k or less mins, do
275             # nothing.
276             # WARNING: THIS CAN DISCONNECT THE LANDSCAPE! The bar file will still look
277             # connected, however, modifying the rate matrix accordingly can lead to
278             # non-ergodicity (e.g. when basin 3 merged to 2, 2/3 merged to 1 because
279             # of a possible transition from 3 to 1, and basin 3 is then removed).
280             # To cope with that, call RateMatrix::keep_connected() and, with its
281             # return value, Results::update_connected() again.
282             # Arguments:
283             # max_min_count: number of minima to keep.
284             # Returns a list of all removed mins (may be empty).
285             sub keep_first_mins {
286 0     0 1 0 my ($self, $max_min_count) = @_;
287              
288 0         0 my @removed_mins = splice @{ $self->_mins }, $max_min_count;
  0         0  
289 0         0 $self->_update_ancestors(@removed_mins);
290              
291              
292 0         0 return @removed_mins;
293             }
294              
295              
296             # Check whether the stored minima contain information about the basin
297             # sizes as computed by Barriers' --bsize switch. Checks only the mfe
298             # basin (since all or neither min should have this information).
299             sub has_bsize {
300 0     0 1 0 my ($self) = @_;
301 0         0 my $has_bsize = $self->get_global_min->has_bsize;
302 0         0 return $has_bsize;
303             }
304              
305              
306             # Construct the file path to this Barriers file. Works only if it was
307             # actually parsed from a file (of course...).
308             # Returns the path as a string.
309             sub path {
310 0     0 1 0 my $self = shift;
311 0 0 0     0 confess 'Cannot construct path, missing volume, directory or file name'
      0        
312             unless defined $self->file_name
313             and $self->has_volume
314             and $self->has_directory
315             ;
316 0         0 my ($volume, $dir, $file_name)
317             = ($self->_volume, $self->_directory, $self->file_name);
318 0         0 my $path = catpath($volume, $dir, $file_name);
319 0         0 return $path;
320             }
321              
322              
323             # Convert back to Barriers file.
324             sub stringify {
325 12     12 1 3007 my $self = shift;
326              
327 12         473 my $header = (q{ } x 5) . $self->seq;
328 12         55 my @lines = ($header, map { $_->stringify } $self->mins);
  264         790  
329              
330 12         408 return join "\n", @lines;
331             }
332              
333             __PACKAGE__->meta->make_immutable;
334              
335             1; # End of Bio::RNA::Barriers::Results
336              
337              
338             __END__
339              
340             =pod
341              
342             =encoding UTF-8
343              
344             =head1 NAME
345              
346             Bio::RNA::Barriers::Results - Parse, query and manipulate results
347             of a I<Barriers> run
348              
349             =head1 SYNOPSIS
350              
351             use Bio::RNA::Barriers;
352              
353             # Read in a Barriers output file.
354             open my $barriers_handle, '<', $barriers_file;
355             my $bardat = Bio::RNA::Barriers::Results->new($barriers_handle);
356             # Or, even simpler, pass file name directly:
357             $bardat = Bio::RNA::Barriers::Results->new($barriers_file );
358              
359             # Print some info
360             print "There are ", $bardat->min_count, " minima.";
361             my $min3 = $bardat->get_min(3);
362             print $min3->grad_struct_count,
363             " structures lead to basin 3 via a gradient walk.\n"
364             if $min3->has_bsize;
365             print "Min ", $min3->index, " is ", ($min3->is_connected ? "" : "NOT"),
366             " connected to the mfe structure.\n";
367              
368             # Print the mfe basin line as in the results file
369             my $mfe_min = $bardat->get_global_min();
370             print "$mfe_min\n";
371              
372              
373             =head1 DESCRIPTION
374              
375             This is what you usually want to use. Pass a file name or a handle to the
376             constructor and you're golden. When querying a specific minimum by index, a
377             L<Bio::RNA::Barriers::Minimum> object is returned. For more details on its
378             methods, please refer to its documentation.
379              
380             =head1 METHODS
381              
382             =head3 Bio::RNA::Barriers::Results->new()
383              
384             Constructs the results object from a Barriers results file.
385              
386             =over 4
387              
388             =item Supported argument list types:
389              
390             =over 4
391              
392             =item new($barriers_file_path):
393              
394             Open handle and read data from the passed path, set file name accordingly.
395              
396             =item new($barriers_handle):
397              
398             A file handle to read data from, C<file_name()> will return
399             undef.
400              
401             =item new($barriers_handle, $barriers_file_path):
402              
403             Read data from handle and set file name from a given path.
404              
405             =item new($hash_ref):
406              
407             Manual construction from required attributes. Only for experts.
408              
409             =back
410              
411             =back
412              
413             =head3 $res->seq()
414              
415             Returns the RNA sequence for which the minima have been computed.
416              
417             =head3 $res->file_name()
418              
419             Name of the file from which the minima have been read. May be C<undef>.
420              
421             =head3 $res->mins()
422              
423             Returns a list of all minima. Useful for iteration in C<for> loops.
424              
425             =head3 $res->get_min($index)
426              
427             Return the single minimum given by a 1-based C<$index> (as used in the results
428             file).
429              
430             =head3 $res->get_mins(@indices)
431              
432             Return a list of minima given by a 1-based list of C<@indices> (as used in the
433             results file).
434              
435             =head3 $res->get_global_min()
436              
437             Returns the basin represented by the (global) mfe structure (i.e. basin 1).
438              
439             =head3 $res->min_count()
440              
441             Returns the total number of basins.
442              
443             =head3 $res->connected_mins()
444              
445             Returns a list of all minima connected to the mfe minimum (min 1).
446              
447             =head3 $res->connected_indices()
448              
449             Returns a list of indices of all connected minima (cf. C<connected_mins()>).
450              
451             =head3 $res->keep_connected()
452              
453             Keep only connected minima and remove all others. The indices are remapped to
454             1..k for k kept minima. Returns (old) indices of all kept minima.
455              
456             =head3 $res->update_connected(@connected_mins)
457              
458             Given an ordered list of indices of all connected minima (as returned by
459             C<RateMatrix::keep_connected()>), delete all other minima and update their
460             ancesters' basin size information accordingly.
461              
462             =head3 $res->keep_first_mins($min_count)
463              
464             Keep only the first C<$min_count> mins. (If there are only that many or less
465             minima in total, do nothing.)
466              
467             WARNING: THIS CAN DISCONNECT THE LANDSCAPE! The bar file will still look
468             connected, however, modifying the rate matrix accordingly can lead to
469             non-ergodicity (e.g. when basin 3 merged to 2, 2/3 merged to 1 because
470             of a possible transition from 3 to 1, and basin 3 is then removed). Returns a
471             list of all removed mins (may be empty).
472              
473             =head3 $res->has_bsize()
474              
475             Check whether the stored minima contain information about the basin sizes as
476             computed by Barriers' --bsize switch. Checks only the mfe basin (since all or
477             neither min should have this information).
478              
479             =head3 $res->path()
480              
481             Construct the file path to this Barriers file. Works only if it was actually
482             parsed from a file (of course...). Returns the path as a string.
483              
484             =head3 $res->stringify()
485              
486             Convert back to Barriers file. Also supports stringification overloading, i.
487             e. C<"$res"> is equivalent to C<$res-E<gt>stringify()>.
488              
489              
490             =head1 AUTHOR
491              
492             Felix Kuehnl, C<< <felix at bioinf.uni-leipzig.de> >>
493              
494             =head1 BUGS
495              
496             Please report any bugs or feature requests by raising an issue at
497             L<https://github.com/xileF1337/Bio-RNA-Barriers/issues>.
498              
499             You can also do so by mailing to C<bug-bio-rna-barmap at rt.cpan.org>,
500             or through the web interface at
501             L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-RNA-BarMap>. I will be
502             notified, and then you'll automatically be notified of progress on your bug as
503             I make changes.
504              
505              
506             =head1 SUPPORT
507              
508             You can find documentation for this module with the perldoc command.
509              
510             perldoc Bio::RNA::Barriers
511              
512              
513             You can also look for information at the official Barriers website:
514              
515             L<https://www.tbi.univie.ac.at/RNA/Barriers/>
516              
517              
518             =over 4
519              
520             =item * Github: the official repository
521              
522             L<https://github.com/xileF1337/Bio-RNA-Barriers>
523              
524             =item * RT: CPAN's request tracker (report bugs here)
525              
526             L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-RNA-Barriers>
527              
528             =item * AnnoCPAN: Annotated CPAN documentation
529              
530             L<http://annocpan.org/dist/Bio-RNA-Barriers>
531              
532             =item * CPAN Ratings
533              
534             L<https://cpanratings.perl.org/d/Bio-RNA-Barriers>
535              
536             =item * Search CPAN
537              
538             L<https://metacpan.org/release/Bio-RNA-Barriers>
539              
540             =back
541              
542              
543             =head1 LICENSE AND COPYRIGHT
544              
545             Copyright 2019-2021 Felix Kuehnl.
546              
547             This program is free software: you can redistribute it and/or modify
548             it under the terms of the GNU General Public License as published by
549             the Free Software Foundation, either version 3 of the License, or
550             (at your option) any later version.
551              
552             This program is distributed in the hope that it will be useful,
553             but WITHOUT ANY WARRANTY; without even the implied warranty of
554             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
555             GNU General Public License for more details.
556              
557             You should have received a copy of the GNU General Public License
558             along with this program. If not, see L<http://www.gnu.org/licenses/>.
559              
560              
561              
562             =cut
563              
564              
565             # End of lib/Bio/RNA/Barriers/Results.pm