File Coverage

blib/lib/Chemistry/Ring/Find.pm
Criterion Covered Total %
statement 120 136 88.2
branch 43 64 67.1
condition 21 29 72.4
subroutine 9 10 90.0
pod 2 5 40.0
total 195 244 79.9


line stmt bran cond sub pod time code
1             package Chemistry::Ring::Find;
2              
3             $VERSION = 0.20;
4             # $Id: Find.pm,v 1.2 2009/05/10 21:12:44 itubert Exp $
5              
6             =head1 NAME
7              
8             Chemistry::Ring::Find - Find the rings (cycles) in a molecule
9              
10             =head1 SYNOPSIS
11              
12             use Chemistry::Ring::Find ':all';
13              
14             # find the smallest ring containing $atom
15             my $ring = find_ring($atom);
16              
17             # find all the rings containing $bond
18             my @rings = find_ring($bond, all => 1);
19              
20             # see below for more options
21              
22             # find the six 4-atom rings in cubane
23             @rings = find_rings($cubane);
24              
25             # find a cubane SSSR with five rings
26             @rings = find_rings($cubane, sssr => 1);
27              
28             =head1 DESCRIPTION
29              
30             The Chemistry::Ring::Find module implements a breadth-first ring finding
31             algorithm, and it can find all rings that contain a given atom or bond, the
32             Smallest Set of Smallest Rings (SSSR), or the "almost SSSR", which is an
33             unambiguous set of rings for cases such as cubane.The algorithms are based on
34             ideas from:
35              
36             1) Leach, A. R.; Dolata, D. P.; Prout, P. Automated Conformational Analysis and
37             Structure Generation: Algorithms for Molecular Perception J. Chem. Inf. Comput.
38             Sci. 1990, 30, 316-324
39              
40             2) Figueras, J. Ring perception using breadth-first search. J. Chem. Inf.
41             Comput. Sci. 1996, 36, 986-991.
42              
43             Ref. 2 is only used for find_ring, not for find_rings, because it has been
44             shown that the overall SSSR method in ref 2 has bugs. Ref 1 inspired
45             find_rings, which depends on find_ring.
46              
47             This module is part of the PerlMol project, L.
48              
49             =head1 FUNCTIONS
50              
51             These functions may be exported explicitly, or all by using the :all tag, but
52             nothing is exported by default.
53              
54             =over
55              
56             =cut
57              
58              
59 3     3   201032 use strict;
  3         8  
  3         126  
60 3     3   17 use warnings;
  3         6  
  3         103  
61 3     3   16 no warnings qw(recursion);
  3         10  
  3         105  
62 3     3   1080 use Chemistry::Ring;
  3         8  
  3         434  
63              
64             our @ISA = qw(Exporter);
65             our @EXPORT_OK = qw(find_ring find_rings);
66             our %EXPORT_TAGS = ( all => \@EXPORT_OK );
67              
68             our $DEBUG = 0;
69              
70             =item find_ring($origin, %opts)
71              
72             Find the smallest ring containg $origin, which may be either an atom or a bond.
73             Returns a Chemistry::Ring object. Options:
74              
75             =over
76              
77             =item all
78              
79             If true, find all the rings containing $origin. If false, return the first ring
80             found. Defaults to false. "All" is supposed to include only "simple" rings,
81             that is, rings that are not a combination of smaller rings.
82              
83             =item min
84              
85             Only find rings with a the given minimum size. Defaults to zero.
86              
87             =item max
88              
89             Only find rings up to the given maximium size. Defaults to unlimited size.
90              
91             =item size
92              
93             Only find rings with this size. Same as setting min and max to the same size.
94             Default: unspecified.
95              
96             =item exclude
97              
98             An array reference containing a list of atoms that must NOT be present in the
99             ring. Defaults to the empty list.
100              
101             =item mirror
102              
103             If true, find each ring twice (forwards and backwards). Defaults to false.
104              
105             =back
106              
107             =cut
108              
109             # $origin is an atom
110             # options: min, max, size, all, mirror, exclude
111             sub find_ring {
112 3     3   21 no warnings qw(uninitialized);
  3         6  
  3         5476  
113 29     29 1 92703 my ($origin, %opts) = @_;
114 29   100     237 my $min_size = $opts{min} || $opts{size} || 0;
115 29   33     127 my $max_size = $opts{max} || $opts{size};
116 29         61 my %paths;
117             my %bond_paths;
118 0         0 my @q;
119 0         0 my @rings;
120 0         0 my %used_end_nodes;
121 0         0 my $required_bond;
122 0         0 my %exclude;
123 29 50       45 @exclude{ @{$opts{exclude} || []} } = ();
  29         153  
124              
125 29 100       212 if ($origin->isa("Chemistry::Bond")) {
126 18         26 $required_bond = $origin;
127 18         71 ($origin) = $origin->atoms;
128             }
129 29         187 @q = ($origin);
130 29         111 $paths{$origin} = [$origin];
131 29         269 $bond_paths{$origin} = [];
132             # $path{$atom} means how to get to $atom from $origin
133              
134 29         208 my $a;
135 29         102 while ($a = shift @q) {
136 178         1269 my $from = $paths{$a}[-2];
137 178 50       1433 print "at $a from $from\n" if $DEBUG;
138 178         508 for my $bn ($a->bonds_neighbors($from)) {
139 305         5759 my $nei = $bn->{to};
140 305         428 my $bond = $bn->{bond};
141 305 50       751 next if exists $exclude{$nei};
142 305 50       2582 print " -> $nei\n" if $DEBUG;
143 305 100       749 if ($paths{$nei}) {
144             # a hypothetical check_collision() would have to do this:
145             # %paths, $min_size, $max_size, $used_end_nodes
146             # %bond_paths, $required_bond, @rings
147             # check_size
148             # check_redundant
149             # check_required_bond
150             # check_contains_ring
151             # push
152              
153 149 50       1244 print "found a path collision... " if $DEBUG;
154             # check to make sure that the ring really started at $origin
155             # and the size is what was requested
156 149         164 my $size = @{$paths{$nei}} + @{$paths{$a}} - 1;
  149         334  
  149         1176  
157             #if($paths{$nei}[1] != $paths{$a}[1]
158 149 100 100     1280 if($paths{$nei}[1] ne $paths{$a}[1]
      33        
      66        
159             and $size >= $min_size
160             and !$max_size || $size <= $max_size)
161             {
162 123 50       2444 print "VALID\n" if $DEBUG;
163 123         132 my @atoms = (@{$paths{$a}}, reverse @{$paths{$nei}});
  123         298  
  123         948  
164 123 50       1075 print "RING = ", print_path(\@atoms) if $DEBUG;
165 123         183 pop @atoms;
166 123 100 100     306 if ($used_end_nodes{$atoms[1]} and !$opts{mirror}) {
167 65 50       661 print "skipping redundant ring\n" if $DEBUG;
168 65         331 next; # don't want to find rings twice
169             }
170 58         133 my @bonds = (@{$bond_paths{$a}}, $bond,
  58         430  
171 58         455 reverse @{$bond_paths{$nei}});
172 58 100 100     517 if ($required_bond
  180         1185  
173             and not grep {$_ eq $required_bond} @bonds) {
174 15 50       132 print "does not include required bond ("
175             . join(" ", $required_bond->atoms)
176             . ")\n" if $DEBUG;
177 15         63 next;
178             }
179 43 100       310 if (contains_ring(\@atoms, \@rings)) {
180 1 50       4 print "contains another ring\n" if $DEBUG;
181 1         8 next;
182             }
183 42         226 my $r = Chemistry::Ring->new;
184 42         480 $r->add_atom(@atoms);
185 42         2160 $r->add_bond(@bonds);
186 42 100       1767 return $r unless $opts{all}; # FOUND VALID RING
187 35         60 push @rings, $r;
188 35         96 $used_end_nodes{$atoms[-1]} = 1;
189             #@used_nodes{@atoms} = ();
190             } else {
191 0         0 print "NOT VALID",
192 0         0 print_path( [@{$paths{$a}},
193 26 50       532 reverse @{$paths{$nei->id}}]) if $DEBUG;
194             }
195             } else {
196 156 50 33     1284 if (!$max_size || @{$paths{$a}} < ($max_size / 2) + 0.1) {
  0         0  
197 156         230 push @q, $nei;
198 156 50       275 print " pushing path\n" if $DEBUG;
199 156         177 $paths{$nei} = [@{$paths{$a}}, $nei];
  156         356  
200 156         1987 $bond_paths{$nei} = [@{$bond_paths{$a}}, $bond];
  156         428  
201 156 50       2496 print print_path($paths{$nei}) if $DEBUG;
202             } else {
203 0 0       0 print "path too long; " if $DEBUG;
204 0 0       0 print print_path($paths{$a}) if $DEBUG;
205             #path too long
206             }
207             }
208             }
209             }
210 22         229 @rings;
211             }
212              
213              
214             sub print_path {
215 0     0 0 0 my $p = shift;
216 0         0 my $ret = " PATH: ";
217 0         0 for my $a (@$p) {
218 0         0 $ret .= "$a - ";
219             }
220 0         0 $ret .= "\n";
221             }
222              
223             # contains_ring($atoms, $rings)
224             # returns true if one of the rings in the array ref $rings is a proper subset
225             # of the atom list in the array ref $atom.
226             sub contains_ring {
227 43     43 0 56 my ($atoms, $rings) = @_;
228 43         49 my %seen;
229 43         159 @seen{@$atoms} = ();
230 43         997 for my $ring (@$rings) {
231 14         47 my $unique_atoms = $ring->atoms;
232 14 100       125 next if $unique_atoms >= @$atoms; # ring is same size or bigger
233             # make sure that $ring has at least one atom not in $atoms
234 1         4 for my $atom ($ring->atoms) {
235 3 50       15 if (exists $seen{$atom}) {
236 3         23 $unique_atoms--;
237             } else {
238 0         0 last; # there's at least one unique atom!
239             }
240             }
241 1 50       7 return 1 unless $unique_atoms;
242             }
243 42         149 0;
244             }
245              
246             =item @rings = find_rings($mol, %options)
247              
248             Find "all" the rings in the molecule. In general it return the Smallest Set of
249             Smallest Rings (SSSR). However, since it is well known that the SSSR is not
250             unique for molecules such as cubane (where the SSSR consists of five
251             unspecified four-member rings, even if the symmetry of the molecule would
252             suggest that the six faces of the cube are equivalent), in such cases
253             find_rings will return a non-ambiguous "non-smallest" set of smallest rings,
254             unless the "sssr" option is given. For example,
255              
256             @rings = find_rings($cubane);
257             # returns SIX four-member rings
258              
259             @rings = find_rings($cubane, sssr => 1);
260             # returns FIVE four-member rings (an unspecified subset of
261             # the six rings above.)
262              
263             =cut
264              
265             sub find_rings {
266 6     6 1 69017 my ($mol, %opts) = @_;
267 6         20 my $visited = {};
268 6         16 my @ring_bonds;
269 6         46 for my $atom ($mol->atoms) {
270 42 100       374 next if $visited->{$atom};
271 7         87 push @ring_bonds, find_ring_bonds($mol, \%opts, $atom, $visited);
272             }
273             #print "rb($_; @{$_->{atoms}})\n" for @ring_bonds;
274 6         54 my @rings;
275 6         13 my $n_rings = @ring_bonds;
276             #print "cyclomatic number=$n_rings\n";
277 6         16 for my $ring_bond (@ring_bonds) {
278 16         47 push @rings, find_ring($ring_bond, all => 1)
279             }
280 6         8 my %seen;
281 6         15 my @ring_keys = map { join " ", sort $_->atoms } @rings;
  26         856  
282             #print "key($_)\n" for @ring_keys;
283 6         263 @seen{@ring_keys} = @rings;
284 6         33 @rings = sort { $a->atoms <=> $b->atoms } values %seen;
  20         255  
285 6 100 100     114 if (!$opts{sssr} and @rings > $n_rings) {
286 1 50       5 $n_rings++ if $rings[$n_rings]->atoms == $rings[$n_rings-1]->atoms;
287             }
288 6 100       31 splice @rings, $n_rings if defined $rings[$n_rings];
289             #splice @rings, $n_rings;
290 6         61 @rings;
291             }
292              
293             # find a set of "ring closure bonds" by doing a depth-first search
294             # it will find a number of bonds equal to the cyclomatic number
295             sub find_ring_bonds {
296 42     42 0 69 my ($mol, $opts, $atom, $visited) = @_;
297              
298 42         82 my @ring_bonds;
299 42         107 $visited->{$atom} = 1;
300 42         362 for my $bn ($atom->bonds_neighbors) {
301 102         1149 my $nei = $bn->{to};
302 102         125 my $bond = $bn->{bond};
303             #next if $visited->{$bond};
304 102 100 66     384 next if not defined $bond or $visited->{$bond};
305 51         510 $visited->{$bond} = 1;
306 51 100       405 if ($visited->{$nei}) { # closed ring
307             #print "closing ring\n";
308 16         136 push @ring_bonds, $bond;
309             } else {
310 35         310 push @ring_bonds,
311             find_ring_bonds($mol, $opts, $nei, $visited);
312             }
313             }
314 42         291 @ring_bonds;
315             }
316              
317             1;
318              
319             =back
320              
321             =head1 BUGS
322              
323             The "all" option in find_ring doesn't quite work as expected. It finds all
324             simple rings and some bridged rings. It never finds fused rings (which is good).
325              
326             =head1 VERSION
327              
328             0.20
329              
330             =head1 SEE ALSO
331              
332             L, L.
333              
334             =head1 AUTHOR
335              
336             Ivan Tubert-Brohman Eitub@cpan.orgE
337              
338             =head1 COPYRIGHT
339              
340             Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is
341             free software; you can redistribute it and/or modify it under the same terms as
342             Perl itself.
343              
344             =cut
345