File Coverage

blib/lib/Chemistry/Ring/Find.pm
Criterion Covered Total %
statement 125 136 91.9
branch 43 64 67.1
condition 20 29 68.9
subroutine 9 10 90.0
pod 2 5 40.0
total 199 244 81.5


line stmt bran cond sub pod time code
1             package Chemistry::Ring::Find;
2              
3             our $VERSION = '0.21'; # VERSION
4             # $Id$
5              
6             =head1 NAME
7              
8             Chemistry::Ring::Find - Find the rings (cycles) in a molecule
9              
10             =head1 SYNOPSIS
11              
12             use Chemistry::Ring::Find ':all';
13              
14             # find the smallest ring containing $atom
15             my $ring = find_ring($atom);
16              
17             # find all the rings containing $bond
18             my @rings = find_ring($bond, all => 1);
19              
20             # see below for more options
21              
22             # find the six 4-atom rings in cubane
23             @rings = find_rings($cubane);
24              
25             # find a cubane SSSR with five rings
26             @rings = find_rings($cubane, sssr => 1);
27              
28             =head1 DESCRIPTION
29              
30             The Chemistry::Ring::Find module implements a breadth-first ring finding
31             algorithm, and it can find all rings that contain a given atom or bond, the
32             Smallest Set of Smallest Rings (SSSR), or the "almost SSSR", which is an
33             unambiguous set of rings for cases such as cubane.The algorithms are based on
34             ideas from:
35              
36             1) Leach, A. R.; Dolata, D. P.; Prout, P. Automated Conformational Analysis and
37             Structure Generation: Algorithms for Molecular Perception J. Chem. Inf. Comput.
38             Sci. 1990, 30, 316-324
39              
40             2) Figueras, J. Ring perception using breadth-first search. J. Chem. Inf.
41             Comput. Sci. 1996, 36, 986-991.
42              
43             Ref. 2 is only used for find_ring, not for find_rings, because it has been
44             shown that the overall SSSR method in ref 2 has bugs. Ref 1 inspired
45             find_rings, which depends on find_ring.
46              
47             This module is part of the PerlMol project, L.
48              
49             =head1 FUNCTIONS
50              
51             These functions may be exported explicitly, or all by using the :all tag, but
52             nothing is exported by default.
53              
54             =over
55              
56             =cut
57              
58              
59 3     3   222457 use strict;
  3         17  
  3         91  
60 3     3   32 use warnings;
  3         13  
  3         108  
61 3     3   19 no warnings qw(recursion);
  3         7  
  3         101  
62 3     3   895 use Chemistry::Ring;
  3         7  
  3         341  
63              
64             our @ISA = qw(Exporter);
65             our @EXPORT_OK = qw(find_ring find_rings);
66             our %EXPORT_TAGS = ( all => \@EXPORT_OK );
67              
68             our $DEBUG = 0;
69              
70             =item find_ring($origin, %opts)
71              
72             Find the smallest ring containing $origin, which may be either an atom or a bond.
73             Returns a Chemistry::Ring object. Options:
74              
75             =over
76              
77             =item all
78              
79             If true, find all the rings containing $origin. If false, return the first ring
80             found. Defaults to false. "All" is supposed to include only "simple" rings,
81             that is, rings that are not a combination of smaller rings.
82              
83             =item min
84              
85             Only find rings with a the given minimum size. Defaults to zero.
86              
87             =item max
88              
89             Only find rings up to the given maximium size. Defaults to unlimited size.
90              
91             =item size
92              
93             Only find rings with this size. Same as setting min and max to the same size.
94             Default: unspecified.
95              
96             =item exclude
97              
98             An array reference containing a list of atoms that must NOT be present in the
99             ring. Defaults to the empty list.
100              
101             =item mirror
102              
103             If true, find each ring twice (forwards and backwards). Defaults to false.
104              
105             =back
106              
107             =cut
108              
109             # $origin is an atom
110             # options: min, max, size, all, mirror, exclude
111             sub find_ring {
112 3     3   21 no warnings qw(uninitialized);
  3         8  
  3         4160  
113 29     29 1 72385 my ($origin, %opts) = @_;
114 29   50     159 my $min_size = $opts{min} || $opts{size} || 0;
115 29   33     95 my $max_size = $opts{max} || $opts{size};
116 29         144 my %paths;
117             my %bond_paths;
118 29         0 my @q;
119 29         0 my @rings;
120 29         0 my %used_end_nodes;
121 29         0 my $required_bond;
122 29         0 my %exclude;
123 29 50       43 @exclude{ @{$opts{exclude} || []} } = ();
  29         145  
124              
125 29 100       167 if ($origin->isa("Chemistry::Bond")) {
126 18         31 $required_bond = $origin;
127 18         54 ($origin) = $origin->atoms;
128             }
129 29         177 @q = ($origin);
130 29         80 $paths{$origin} = [$origin];
131 29         256 $bond_paths{$origin} = [];
132             # $path{$atom} means how to get to $atom from $origin
133              
134 29         196 my $a;
135 29         80 while ($a = shift @q) {
136 178         1030 my $from = $paths{$a}[-2];
137 178 50       1312 print "at $a from $from\n" if $DEBUG;
138 178         389 for my $bn ($a->bonds_neighbors($from)) {
139 305         4844 my $nei = $bn->{to};
140 305         495 my $bond = $bn->{bond};
141 305 50       576 next if exists $exclude{$nei};
142 305 50       2675 print " -> $nei\n" if $DEBUG;
143 305 100       493 if ($paths{$nei}) {
144             # a hypothetical check_collision() would have to do this:
145             # %paths, $min_size, $max_size, $used_end_nodes
146             # %bond_paths, $required_bond, @rings
147             # check_size
148             # check_redundant
149             # check_required_bond
150             # check_contains_ring
151             # push
152              
153 149 50       1101 print "found a path collision... " if $DEBUG;
154             # check to make sure that the ring really started at $origin
155             # and the size is what was requested
156 149         176 my $size = @{$paths{$nei}} + @{$paths{$a}} - 1;
  149         290  
  149         1082  
157             #if($paths{$nei}[1] != $paths{$a}[1]
158 149 100 100     1092 if($paths{$nei}[1] ne $paths{$a}[1]
      33        
      66        
159             and $size >= $min_size
160             and !$max_size || $size <= $max_size)
161             {
162 123 50       2288 print "VALID\n" if $DEBUG;
163 123         160 my @atoms = (@{$paths{$a}}, reverse @{$paths{$nei}});
  123         231  
  123         914  
164 123 50       1036 print "RING = ", print_path(\@atoms) if $DEBUG;
165 123         169 pop @atoms;
166 123 100 100     283 if ($used_end_nodes{$atoms[1]} and !$opts{mirror}) {
167 65 50       575 print "skipping redundant ring\n" if $DEBUG;
168 65         218 next; # don't want to find rings twice
169             }
170 58         114 my @bonds = (@{$bond_paths{$a}}, $bond,
171 58         452 reverse @{$bond_paths{$nei}});
  58         416  
172 58 100 100     536 if ($required_bond
173 180         1007 and not grep {$_ eq $required_bond} @bonds) {
174 15 50       140 print "does not include required bond ("
175             . join(" ", $required_bond->atoms)
176             . ")\n" if $DEBUG;
177 15         55 next;
178             }
179 43 100       261 if (contains_ring(\@atoms, \@rings)) {
180 1 50       5 print "contains another ring\n" if $DEBUG;
181 1         5 next;
182             }
183 42         168 my $r = Chemistry::Ring->new;
184 42         358 $r->add_atom(@atoms);
185 42         1733 $r->add_bond(@bonds);
186 42 100       1551 return $r unless $opts{all}; # FOUND VALID RING
187 35         60 push @rings, $r;
188 35         80 $used_end_nodes{$atoms[-1]} = 1;
189             #@used_nodes{@atoms} = ();
190             } else {
191             print "NOT VALID",
192 0         0 print_path( [@{$paths{$a}},
193 26 50       458 reverse @{$paths{$nei->id}}]) if $DEBUG;
  0         0  
194             }
195             } else {
196 156 50 33     1253 if (!$max_size || @{$paths{$a}} < ($max_size / 2) + 0.1) {
  0         0  
197 156         230 push @q, $nei;
198 156 50       255 print " pushing path\n" if $DEBUG;
199 156         205 $paths{$nei} = [@{$paths{$a}}, $nei];
  156         279  
200 156         1922 $bond_paths{$nei} = [@{$bond_paths{$a}}, $bond];
  156         277  
201 156 50       2538 print print_path($paths{$nei}) if $DEBUG;
202             } else {
203 0 0       0 print "path too long; " if $DEBUG;
204 0 0       0 print print_path($paths{$a}) if $DEBUG;
205             #path too long
206             }
207             }
208             }
209             }
210 22         161 @rings;
211             }
212              
213              
214             sub print_path {
215 0     0 0 0 my $p = shift;
216 0         0 my $ret = " PATH: ";
217 0         0 for my $a (@$p) {
218 0         0 $ret .= "$a - ";
219             }
220 0         0 $ret .= "\n";
221             }
222              
223             # contains_ring($atoms, $rings)
224             # returns true if one of the rings in the array ref $rings is a proper subset
225             # of the atom list in the array ref $atom.
226             sub contains_ring {
227 43     43 0 70 my ($atoms, $rings) = @_;
228 43         65 my %seen;
229 43         91 @seen{@$atoms} = ();
230 43         1047 for my $ring (@$rings) {
231 14         51 my $unique_atoms = $ring->atoms;
232 14 100       121 next if $unique_atoms >= @$atoms; # ring is same size or bigger
233             # make sure that $ring has at least one atom not in $atoms
234 1         4 for my $atom ($ring->atoms) {
235 3 50       15 if (exists $seen{$atom}) {
236 3         28 $unique_atoms--;
237             } else {
238 0         0 last; # there's at least one unique atom!
239             }
240             }
241 1 50       5 return 1 unless $unique_atoms;
242             }
243 42         118 0;
244             }
245              
246             =item @rings = find_rings($mol, %options)
247              
248             Find "all" the rings in the molecule. In general it return the Smallest Set of
249             Smallest Rings (SSSR). However, since it is well known that the SSSR is not
250             unique for molecules such as cubane (where the SSSR consists of five
251             unspecified four-member rings, even if the symmetry of the molecule would
252             suggest that the six faces of the cube are equivalent), in such cases
253             find_rings will return a non-ambiguous "non-smallest" set of smallest rings,
254             unless the "sssr" option is given. For example,
255              
256             @rings = find_rings($cubane);
257             # returns SIX four-member rings
258              
259             @rings = find_rings($cubane, sssr => 1);
260             # returns FIVE four-member rings (an unspecified subset of
261             # the six rings above.)
262              
263             =cut
264              
265             sub find_rings {
266 6     6 1 59064 my ($mol, %opts) = @_;
267 6         13 my $visited = {};
268 6         11 my @ring_bonds;
269 6         18 for my $atom ($mol->atoms) {
270 42 100       283 next if $visited->{$atom};
271 7         78 push @ring_bonds, find_ring_bonds($mol, \%opts, $atom, $visited);
272             }
273             #print "rb($_; @{$_->{atoms}})\n" for @ring_bonds;
274 6         49 my @rings;
275 6         18 my $n_rings = @ring_bonds;
276             #print "cyclomatic number=$n_rings\n";
277 6         14 for my $ring_bond (@ring_bonds) {
278 16         36 push @rings, find_ring($ring_bond, all => 1)
279             }
280 6         9 my %seen;
281 6         18 my @ring_keys = map { join " ", sort $_->atoms } @rings;
  26         888  
282             #print "key($_)\n" for @ring_keys;
283 6         289 @seen{@ring_keys} = @rings;
284 6         26 @rings = sort { $a->atoms <=> $b->atoms } values %seen;
  20         179  
285 6 100 100     99 if (!$opts{sssr} and @rings > $n_rings) {
286 1 50       4 $n_rings++ if $rings[$n_rings]->atoms == $rings[$n_rings-1]->atoms;
287             }
288 6 100       32 splice @rings, $n_rings if defined $rings[$n_rings];
289             #splice @rings, $n_rings;
290 6         37 @rings;
291             }
292              
293             # find a set of "ring closure bonds" by doing a depth-first search
294             # it will find a number of bonds equal to the cyclomatic number
295             sub find_ring_bonds {
296 42     42 0 78 my ($mol, $opts, $atom, $visited) = @_;
297              
298 42         50 my @ring_bonds;
299 42         80 $visited->{$atom} = 1;
300 42         330 for my $bn ($atom->bonds_neighbors) {
301 102         1032 my $nei = $bn->{to};
302 102         170 my $bond = $bn->{bond};
303             #next if $visited->{$bond};
304 102 100 66     278 next if not defined $bond or $visited->{$bond};
305 51         424 $visited->{$bond} = 1;
306 51 100       381 if ($visited->{$nei}) { # closed ring
307             #print "closing ring\n";
308 16         124 push @ring_bonds, $bond;
309             } else {
310 35         306 push @ring_bonds,
311             find_ring_bonds($mol, $opts, $nei, $visited);
312             }
313             }
314 42         219 @ring_bonds;
315             }
316              
317             1;
318              
319             =back
320              
321             =head1 BUGS
322              
323             The "all" option in find_ring doesn't quite work as expected. It finds all
324             simple rings and some bridged rings. It never finds fused rings (which is good).
325              
326             =head1 SOURCE CODE REPOSITORY
327              
328             L
329              
330             =head1 SEE ALSO
331              
332             L
333              
334             =head1 AUTHOR
335              
336             Ivan Tubert-Brohman Eitub@cpan.orgE
337              
338             =head1 COPYRIGHT
339              
340             Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is
341             free software; you can redistribute it and/or modify it under the same terms as
342             Perl itself.
343              
344             =cut