File Coverage

blib/lib/Chemistry/Bond/Find.pm

Criterion	Covered	Total	%
statement	15	254	5.9
branch	0	128	0.0
condition	0	85	0.0
subroutine	5	24	20.8
pod	2	19	10.5
total	22	510	4.3

line	stmt	bran	cond	sub	pod	time	code
1							package Chemistry::Bond::Find;
2
3							$VERSION = '0.23';
4							our $DEBUG = 0;
5							# $Id: Find.pm,v 1.9 2009/05/10 20:03:44 itubert Exp $
6
7							=head1 NAME
8
9							Chemistry::Bond::Find - Detect bonds in a molecule from atomic 3D coordinates and assign formal bond orders
10
11							=head1 SYNOPSIS
12
13							use Chemistry::Bond::Find ':all'; # export all available functions
14
15							# $mol is a Chemistry::Mol object
16							find_bonds($mol);
17							assign_bond_orders($mol);
18
19							=head1 DESCRIPTION
20
21							This module provides functions for detecting the bonds in a molecule from its
22							3D coordinates by using simple cutoffs, and for guessing the formal bond
23							orders.
24
25							This module is part of the PerlMol project, L.
26
27							=head1 FUNCTIONS
28
29							These functions may be exported, although nothing is exported by default.
30
31							=over
32
33							=cut
34
35	1			1		24871	use strict;
	1					3
	1					56
36	1			1		1033	use Chemistry::Mol;
	1					102308
	1					77
37	1			1		13	use Exporter;
	1					8
	1					28
38	1			1		6	use Carp;
	1					2
	1					511
39
40							our @ISA = qw(Exporter);
41							our @EXPORT_OK = qw(find_bonds assign_bond_orders );
42							our %EXPORT_TAGS = (all => \@EXPORT_OK);
43
44							# table taken from
45							# http://environmentalchemistry.com/yogi/periodic/covalentradius.html
46							our %Covalent_Radius = (
47							Ag => 1.34, Al => 1.18, Ar => 0.98, As => 1.20, At => 1.45, Au => 1.34,
48							B => 0.82, Ba => 1.98, Be => 0.90, Bi => 1.46, Br => 1.14, C => 0.77,
49							Ca => 1.74, Cd => 1.48, Ce => 1.65, Cl => 0.99, Co => 1.16, Cr => 1.18,
50							Cs => 2.35, Cu => 1.17, Dy => 1.59, Er => 1.57, Eu => 1.85, F => 0.72,
51							Fe => 1.17, Ga => 1.26, Gd => 1.61, Ge => 1.22, H => 0.32, He => 0.93,
52							Hf => 1.44, Hg => 1.49, Ho => 1.58, I => 1.33, In => 1.44, Ir => 1.27,
53							K => 2.03, Kr => 1.12, La => 1.69, Li => 1.23, Lu => 1.56, Mg => 1.36,
54							Mn => 1.17, Mo => 1.30, N => 0.75, Na => 1.54, Nb => 1.34, Nd => 1.64,
55							Ne => 0.71, Ni => 1.15, O => 0.73, Os => 1.26, P => 1.06, Pb => 1.47,
56							Pd => 1.28, Pm => 1.63, Po => 1.46, Pr => 1.65, Pt => 1.30, Rb => 2.16,
57							Re => 1.28, Rh => 1.25, Ru => 1.25, S => 1.02, Sb => 1.40, Sc => 1.44,
58							Se => 1.16, Si => 1.11, Sm => 1.62, Sn => 1.41, Sr => 1.91, Ta => 1.34,
59							Tb => 1.59, Tc => 1.27, Te => 1.36, Th => 1.65, Ti => 1.32, Tl => 1.48,
60							Tm => 1.56, U => 1.42, V => 1.22, W => 1.30, Xe => 1.31, Y => 1.62,
61							Yb => 1.74, Zn => 1.25, Zr => 1.45,
62							);
63
64							our $Default_Radius = 1.5; # radius for unknown elements
65
66							# I considered inlining this function, but the performance gain was minimal
67							# ( < 5 % ), so it's probably better to leave it here
68							# $opts->{cuttof} Hash Table of cutoffs.
69							# Example: key = "H C", value = [0.76, 1.42]
70							sub are_bonded {
71	0			0	0		my ($symbol_a, $symbol_b, $r, $opts) = @_;
72	0		0				return $r < ($opts->{cuttoffs}{"$symbol_a $symbol_b"} \|\|=
			0
			0
73							(($Covalent_Radius{$symbol_a} \|\| $Default_Radius)
74							+ ($Covalent_Radius{$symbol_b} \|\| $Default_Radius))
75							* $opts->{tolerance});
76							}
77
78
79							=item C
80
81							Finds and adds the bonds in a molecule. Only use it in molecules that have no
82							explicit bonds; for example, after reading a file with 3D coordinates but no
83							bond orders.
84
85							Available options:
86
87							=over
88
89							=item tolerance
90
91							Defaults to 1.1. Two atoms are considered to be bound if the distance between
92							them is less than the sum of their covalent radii multiplied by the tolerance.
93
94							=item margin
95
96							NOTE: in general setting this option is not recommended, unless you know what
97							you are doing. It is used by the space partitioning algorithm to determine the
98							"bucket size". It defaults to 2 * Rmax * tolerance, where Rmax is the largest
99							covalent radius among the elements found in the molecule. For example, if a
100							molecule has C, H, N, O, and I, Rmax = R(I) = 1.33, so the margin defaults to 2
101							* 1.33 * 1.1 = 2.926. This margin ensures that no bonds are missed by the
102							partitioning algorithm.
103
104							Using a smaller value gives faster results, but at the risk of missing some
105							bonds. In this example, if you are certain that your molecule doesn't contain
106							I-I bonds (but it has C-I bonds), you can set margin to (0.77 + 1.33) * 1.1 =
107							2.31 and you still won't miss any bonds (0.77 is the radius of carbon). This
108							only has a significant impact for molecules with a thousand atoms or more, but
109							it can reduce the execution time by 50% in some cases.
110
111							=item orders
112
113							If true, assign the bond orders after finding them, by calling
114							C.
115
116							=item bond_class
117
118							The class that will be used for creating the new bonds. The default is
119							the bond class returned by C<< $mol->bond_class >>.
120
121							=back
122
123							=cut
124
125							# The new algorithm based on a suggestion by BrowserUK
126							sub find_bonds {
127	0			0	1		my ($mol, %opts) = @_;
128	0						%opts = (min_atoms => 20, tolerance => 1.1, %opts,
129							cutoffs => {}); # set defaults
130	0						my $margin = guess_margin($mol, \%opts);
131	0						%opts = (margin => $margin, %opts);
132	0						my $grid = {};
133	0						partition_space($mol, $grid, \%opts);
134	0						find_bonds_grid($mol, $grid, \%opts);
135	0	0					if ($opts{orders}) {
136	0						assign_bond_orders($mol, %opts);
137							}
138							}
139
140	1			1		1141	use POSIX 'floor';
	1					14546
	1					8
141							my $Y = 1000;
142							my $X = $Y * $Y;
143							my $Z = 1;
144							my $ORIGIN = int(($Y3 + $Y2 + $Y)/2);
145
146							# used by the new BrowserUK algorithm
147							sub partition_space {
148	0			0	0		my ($mol, $grid, $opts) = @_;
149	0						my $margin = $opts->{margin};
150	0						for my $atom ($mol->atoms) {
151	0						my (@vec) = $atom->coords->array;
152	0						my (@norm_vec) = map { floor($_ / $margin) } @vec;
	0
153	0						my $n = $X * $norm_vec[0] + $Y * $norm_vec[1]
154							+ $norm_vec[2] + $ORIGIN;
155	0						push @{$grid->{$n}}, $atom;
	0
156							}
157							}
158
159							# used by the new BrowserUK algorithm
160							sub find_bonds_grid {
161	0			0	0		my ($mol, $grid, $opts) = @_;
162	0						while (my ($n, $atoms) = each %$grid) {
163							#print "Cell $n has ". @$atoms . " atoms\n";
164	0						find_bonds_n2_one_set($mol, $atoms, $opts);
165	0						for my $neigh_n (
166							$n+$Z,
167							$n+$Z+$Y, $n+$Z-$Y, $n+$Z+$X, $n+$Z-$X,
168							$n+$Z+$Y+$X, $n+$Z+$Y-$X, $n+$Z-$Y+$X, $n+$Z-$Y-$X,
169							$n+$Y, $n+$Y+$X, $n+$Y-$X,
170							$n+$X,
171							) {
172	0	0					if ($grid->{$neigh_n}) {
173	0						find_bonds_n2_two_sets($mol, $atoms, $grid->{$neigh_n}, $opts);
174							}
175							}
176							}
177							}
178
179							# used by both find_bonds variants to figure out the maximum cutoff
180							sub guess_margin {
181	0			0	0		my ($mol, $opts) = @_;
182	0						my $formula = $mol->formula_hash;
183	0						my $max = 0;
184	0						for my $elem (keys %$formula) {
185	0	0					$max = $Covalent_Radius{$elem} if $Covalent_Radius{$elem} > $max;
186							}
187	0						$max = 2 $opts->{tolerance};
188							#printf "MARGIN guessed at (%.2f)\n", $max;
189	0						$max;
190							}
191
192							# brute-force N^2 algorithm
193							sub find_bonds_n2_one_set {
194	0			0	0		my ($mol, $atoms, $opts) = @_;
195	0		0				my $bond_class = $opts->{bond_class} \|\| $mol->bond_class;
196	0						for (my $i = 0; $i < @$atoms; ++$i) {
197	0						for (my $j = $i + 1; $j < @$atoms; ++$j) {
198	0						my ($a1, $a2) = ($atoms->[$i], $atoms->[$j]);
199	0	0					if (are_bonded($a1->symbol, $a2->symbol, scalar $a1->distance($a2), $opts)) {
200	0						$mol->add_bond($bond_class->new(atoms=>[$a1, $a2]));
201							}
202							}
203							}
204							}
205
206							# brute force N*M algorithm for finding bonds between to sets of atoms
207							sub find_bonds_n2_two_sets {
208	0			0	0		my ($mol, $atoms1, $atoms2, $opts) = @_;
209	0		0				my $bond_class = $opts->{bond_class} \|\| $mol->bond_class;
210	0						for my $a1 (@$atoms1) {
211	0						for my $a2 (@$atoms2) {
212	0	0					if (are_bonded($a1->symbol, $a2->symbol, scalar $a1->distance($a2), $opts)) {
213	0						$mol->add_bond($bond_class->new(atoms=>[$a1, $a2]));
214							}
215							}
216							}
217							}
218
219
220							=item C
221
222							Assign the formal bond orders in a molecule. The bonds must already be defined,
223							either by C or because the molecule was read from a file that
224							included bonds but no bond orders. If the bond orders were already defined
225							(maybe the molecule came from a file that did include bond orders after all),
226							the original bond orders are erased and the process begins from scratch. Two
227							different algorithms are available, and may be selected by using the "method"
228							option:
229
230							assign_bond_orders($mol, method => 'itub');
231							assign_bond_orders($mol, method => 'baber');
232
233							=over
234
235							=item itub
236
237							This is the default if no method is specified. Developed from scratch by the
238							author of this module, this algorithm requires only the connection table
239							information, and it requires that all hydrogen atoms be explicit. It looks for
240							an atom with unsatisfied valence, increases a bond order, and then does the
241							same recursively for the neighbors. If everybody's not happy at the end, it
242							backtracks and tries another bond. The recursive part does not cover the whole
243							molecule, but only the contiguous region of "unhappy" atoms next to the
244							starting atom and their neighbors. This permits separating the molecule into
245							independent regions, so that if one is solved and there's a problem in another,
246							we don't have to backtrack to the first one.
247
248							The itub algorithm has the following additional options:
249
250							=over
251
252							=item use_coords
253
254							Although the algorithm does not I 3D coordinates, it uses them by
255							default to improve the initial guesses of which bond orders should be
256							increased. To avoid using coordinates, add the C option with a
257							false value:
258
259							assign_bond_orders($mol, use_coords => 0);
260
261							The results are the same most of the time, but using good coordinates improves
262							the results for complicated cases such as fused heteroaromatic systems.
263
264							=item scratch
265
266							If true, start the bond order assignment from scratch by assuming that all bond
267							orders are 1. If false, start from the current bond orders and try to fix the
268							unsatisfied valences. This option is true by default.
269
270							=back
271
272							=item baber
273
274							A bond order assignment algorithm based on Baber, J. C.; Hodgkin, E. E.
275							J. Chem. Inf. Comp. Sci. 1992, 32, 401-406 (with some interpretation).
276
277							This algorithm uses the 3D coordinates along with various cutoffs and
278							confidence formulas to guess the bond orders. It then tries to resolve
279							conflicts by looping through the atoms (but is not recursive or backtracking).
280							It does not require explicit hydrogens (although it's better when they are
281							available) because it was designed for use with real crystallographic data
282							which often doesn't have hydrogen atoms.
283
284							This method doesn't always give a good answer, especially for conjugated and
285							aromatic systems. The variation used in this module adds some random numbers to
286							resolve some ambiguities and break loops, so the results are not even entirely
287							deterministic (the 'itub' method is deterministic but the result may depend on
288							the input order of the atoms).
289
290							=back
291
292							=cut
293
294							sub assign_bond_orders {
295	0			0	1		my ($mol, %opts) = @_;
296	0	0	0				if ($opts{method} and $opts{method} eq 'baber') {
297	0						assign_bond_orders_baber($mol, %opts);
298							} else {
299	0						assign_bond_orders_itub($mol, %opts);
300							}
301							}
302
303							####### Bond order assignment algorithm by Ivan Tubert-Brohman
304
305							# The "typical" valence that we expect an atom to have satisfied. If not
306							# given, a value of 1 is assumed.
307							my %MIN_VALENCES = ( O => 2, C => 4, S => 2, H => 1, N => 3, P => 3, Si => 2,
308							F => 1, Cl => 1, Br => 1, I => 1 );
309
310							# $ALLOWED_INCREASES{$from}{$to} means that element $from is willing to
311							# exceed its minimum valence by having a multiple bond to element $to.
312							my %ALLOWED_INCREASES = (
313							Cl => { O => 7 },
314							Br => { O => 7 },
315							I => { O => 7 },
316							S => { O => 6, C => 3, S => 4 },
317							N => { O => 5, C => 4, N => 4 },
318							Si => { O => 4, C => 4 },
319							O => { C => 3, O => 3 },
320							P => { O => 5, C => 4 },
321							);
322
323							sub assign_bond_orders_itub {
324	0			0	0		my ($mol, %opts) = @_;
325	0						%opts = (use_coords => 1, scratch => 1, funny => {}, %opts);
326							#my @funny_atoms;
327
328	0	0					if ($opts{scratch}) {
329	0						$_->order(1) for $mol->bonds;
330							}
331	0						for my $atom ($mol->atoms) {
332	0	0					if (wants_more_bonds($atom)) {
333	0						my $ret = make_happy($atom, \%opts, []);
334	0	0					print "Atom $atom made happy? '$ret'\n" if $DEBUG;
335	0	0					unless ($ret) { # atom is funny (has formal charge or unpaired e-)
336	0						$opts{funny}{$atom} = 1;
337							#push @funny_atoms, $atom;
338							}
339							}
340							}
341							}
342
343							sub wants_more_bonds {
344	0			0	0		my ($atom) = @_;
345	0						my $valence = $atom->valence;
346							#$n_bonds += $_->order for $atom->bonds;
347
348	0		0				my $ret = ($valence < ($MIN_VALENCES{$atom->symbol} \|\| 1));
349	0	0					print " $atom wants more bonds? '$ret'\n" if $DEBUG;
350	0						$ret;
351							}
352
353							sub valid_bonds {
354	0			0	0		my ($atom) = @_;
355	0						my $valence = $atom->valence;
356							#$n_bonds += $_->order for $atom->bonds;
357
358	0		0				my $ret = ($valence <= ($MIN_VALENCES{$atom->symbol} \|\| 1));
359	0	0					print " $atom wants more bonds? '$ret'\n" if $DEBUG;
360	0						$ret;
361							}
362
363							# the heart of the algorithm; increase a bond order, and then do the same
364							# recursively for the neighbors. If everybody's not happy at the end,
365							# backtrack and try another bond. Note that this search does not cover the
366							# whole molecule, but only the "unhappy" atoms and their neighbors. This
367							# permits separating the molecule into independent regions, so that if one
368							# is solved and there's a problem in another, we don't have to backtrack
369							# to the first one.
370							sub make_happy {
371	0			0	0		my ($atom, $opts, $q) = @_;
372
373	0	0	0				if (!$opts->{funny}{$atom} and wants_more_bonds($atom)) {
374	0						push @$q, ($atom->neighbors); # the queue of atoms to be checked
375	0						for my $bn (sorted_neighbors($atom, $opts)) {
376	0						my ($nei, $bond) = @$bn{'to', 'bond'};
377							# note: it would be better to find the bond that is most likely to
378							# be increased by taking bond length into account
379	0	0					if (accepts_more_bonds($nei, $atom)) {
380	0						my $order = $bond->order;
381	0	0					print "increasing $bond($atom-$nei)\n" if $DEBUG;
382	0						$bond->order($order+1); # increase order, be happy
383							# now make sure everybody's happy
384	0						push @$q, $nei->neighbors($atom); # our friend's neighbors
385							# need to be happy too
386	0	0					if (make_happy($atom, $opts, $q)) { # are we happy now?
387	0						return 1; # everybody's happy?
388							} else {
389							# something's wrong, will have to backtrack
390	0	0					print "decreasing $bond($atom-$nei)\n" if $DEBUG;
391	0						$bond->order($order);
392	0						push @$q, $nei; # remember; neighbor's not happy either
393							}
394							}
395							}
396							# couldn't find happiness
397							} else {
398	0						my $next = shift @$q;
399	0	0					unless ($next) { # no one left; everybody is happy!
400	0	0					print "no more atoms left to check at $atom\n" if $DEBUG;
401	0						return 1;
402							}
403	0						return (make_happy($next, $opts, $q)); # happy if next atom is happy
404							}
405	0	0					print "not happy at $atom\n" if $DEBUG;
406	0						0; # not happy
407							}
408
409							sub sorted_neighbors {
410	0			0	0		my ($atom, $opts) = @_;
411	0						my $use_coords = $opts->{use_coords};
412
413	0	0					print "sorting neighbors\n" if $DEBUG;
414	0						return map {
415	0	0					$_->{bn}
		0
416							} sort {
417	0		0				($b->{wants} cmp $a->{wants}) # those who want go first
418							\|\| ($use_coords ? ($a->{len} <=> $b->{len}) : 0);
419							# if they both want to the same degree, the shortest
420							# bond goes first if we are using coords
421							} map {
422	0						+{
423							wants => wants_more_bonds($_->{to}),
424							bn => $_,
425							len => !$use_coords \|\| $_->{bond}->length,
426							}
427							} $atom->bonds_neighbors;
428							}
429
430							sub accepts_more_bonds {
431	0			0	0		my ($atom, $to) = @_;
432	0						my ($symbol, $to_symbol) = ($atom->symbol, $to->symbol);
433
434							#my $n_bonds = 0;
435							#$n_bonds += $_->order for $atom->bonds;
436	0						my $valence = $atom->valence;
437
438							# not enough bonds even for the minimum valence?
439	0	0	0				return 1 if ($valence < ($MIN_VALENCES{$atom->symbol} \|\| 1));
440
441	0	0	0				if ($valence < ($ALLOWED_INCREASES{$symbol}{$to_symbol} \|\| 0)) {
442							# make sure we are willing to make multiple bonds with this guy
443	0						return 1;
444							} else {
445	0						return 0; # max. valence satisfied
446							}
447							}
448
449
450							############
451							# Bond order assignment algorithm based on Baber, J. C.; Hodgkin, E. E.
452							# J. Chem. Inf. Comp. Sci. 1992, 32, 401-406
453
454							# this algorithm uses the 3D coordinates along with various cutoffs and
455							# confidence formulas to assign the bond orders. It does not require all
456							# explicit hydrogens (although it's better when they are available) because
457							# it's design for use with real crystallographic data which often doesn't
458							# have hydrogen.
459
460							my %Valences = (
461							C => [4], N => [3,4], O => [2],
462							P => [4, 5], S => [2, 4, 6], As => [4, 5], Se => [2, 4, 6],
463							Te => [2, 4, 6],
464							F => [1], Cl => [1, 3], Br => [1, 3, 5], I => [1, 3, 5, 7],
465							H => [1],
466							);
467
468							# note; this table should only have single-letter symbols
469							my %Bond_Orders = (
470							"C C" => { is => 1.49, id => 1.31, it => 1.18, wsd => 1.38, wdt => 1.21 },
471							"C N" => { is => 1.42, id => 1.32, it => 1.14, wsd => 1.34, wdt => 1.20 },
472							"C O" => { is => 1.41, id => 1.22, wsd => 1.28 },
473							"C S" => { is => 1.78, id => 1.68, wsd => 1.70 },
474							"N N" => { is => 1.40, id => 1.22, wsd => 1.32 },
475							"N O" => { is => 1.39, id => 1.22, wsd => 1.25 },
476							"O S" => { is => 1.58, id => 1.45, wsd => 1.54 },
477							#"O S" => { is => 1.58, id => 1.45, wsd => 1.51 }, # modified value
478							"O P" => { is => 1.60, id => 1.48, wsd => 1.52 },
479							);
480
481							# add keys for reverse bond for ease of lookup
482							for (keys %Bond_Orders) {
483							$Bond_Orders{ scalar reverse $_ } = $Bond_Orders{$_};
484							}
485
486							sub assign_bond_orders_baber {
487	0			0	0		my ($mol, %opts) = @_;
488	0		0				my $max_tries = $opts{max_tries} \|\| 10;
489
490	0						assign_initial_bonds($mol);
491	0						assign_initial_coordinations($mol);
492	0						my $tries = 0;
493	0						while (resolve_conflicts($mol)) {
494	0	0					if ($tries++ > $max_tries) {
495	0	0					print "too many tries\n" if $DEBUG;
496	0						last;
497							}
498	0	0					print "try again\n" if $DEBUG;
499							}
500	0						$tries;
501							}
502
503							sub assign_initial_bonds {
504	0			0	0		my ($mol) = @_;
505
506	0						for my $bond ($mol->bonds) {
507							#my %bond_has = map { ($_->symbol, 1 ) } $bond->atoms;
508	0						my $symbols = join " ", map { $_->symbol } $bond->atoms;
	0
509	0						my ($order, $confidence);
510	0						my $l_obs = $bond->length;
511	0	0					if ($symbols =~ /\b(H\|F\|Cl\|Br\|I)\b/) {
		0
		0
		0
512	0						$order = 1;
513	0						$confidence = 10000;
514							} elsif ($symbols =~ /\bSi\b/) {
515	0						$order = 1;
516	0						$confidence = 20 * ($l_obs - 1.4);
517							} elsif ($symbols =~ /\bB\b/) {
518	0						$order = 1;
519	0						$confidence = 20 * ($l_obs - 1.2);
520							} elsif (my $pars = $Bond_Orders{$symbols}) {
521	0	0					if ($pars->{it}) { # may have triple bonds
522	0	0	0				if ($l_obs > $pars->{wsd}) {
		0	0
		0
523	0						$order = 1;
524	0						$confidence = ($l_obs - $pars->{wsd}) /
525							(2 * ($pars->{is} - $pars->{wsd}));
526							} elsif ($pars->{id} < $l_obs and $l_obs < $pars->{wsd}) {
527	0						$order = 2;
528	0						$confidence = ($pars->{wsd} - $l_obs) /
529							(2 * ($pars->{wsd} - $pars->{id}));
530							} elsif ($pars->{id} > $l_obs and $l_obs > $pars->{wdt}) {
531	0						$order = 2;
532	0						$confidence = ($l_obs - $pars->{wdt}) /
533							(2 * ($pars->{id} - $pars->{wdt}));
534							} else {
535	0						$order = 3;
536	0						$confidence = ($pars->{wdt} - $l_obs) /
537							(2 * ($pars->{wdt} - $pars->{it}));
538							}
539							} else { # only single and double
540	0	0					if ($l_obs > $pars->{wsd}) {
541	0						$order = 1;
542	0						$confidence = ($l_obs - $pars->{wsd}) /
543							(2 * ($pars->{is} - $pars->{wsd}));
544							} else {
545	0						$order = 2;
546	0						$confidence = ($pars->{wsd} - $l_obs) /
547							(2 * ($pars->{wsd} - $pars->{id}));
548							}
549							}
550							} else { # unknown atom pair; let's assume it's always single
551	0						$order = 1;
552	0						$confidence = 100_000;
553							}
554
555	0						$confidence *= 10; # normalization constant
556	0						$bond->order($order);
557	0						$bond->attr("bond-find/confidence", $confidence);
558	0	0					print "bond $bond($symbols) len=$l_obs, order=$order, ",
559							"conf=$confidence\n"
560							if $DEBUG;
561							}
562							}
563
564							sub assign_initial_coordinations {
565	0			0	0		my ($mol) = @_;
566
567	0						for my $atom ($mol->atoms) {
568	0						my @neighbors = $atom->neighbors;
569	0						my $a_obs = 0;
570	0						my $n = 0;
571	0						my ($max_conns, $confidence);
572	0						for (my $i = 0; $i < @neighbors - 1; $i++) {
573	0						for (my $j = $i + 1; $j < @neighbors; $j++) {
574	0						my $angle = Chemistry::Atom::angle_deg($neighbors[$i],
575							$atom, $neighbors[$j]);
576							# don't count linear angles for octahedral geometries
577	0	0	0				$n++, $a_obs += $angle unless @neighbors > 2 and $angle > 150;
578							}
579							}
580	0	0					$a_obs /= $n if $n;
581
582	0	0	0				if ($n == 0) {
		0	0
		0	0
		0	0
		0
		0
		0
583							# this condition is not described explicitly in the article
584	0						$max_conns = 1; # undefined
585	0						$confidence = 0;
586	0						$n = 1;
587							} elsif ($a_obs > 150) {
588	0						$max_conns = 2; # linear
589	0						$confidence = ($a_obs - 150) / 30;
590							} elsif (120 > $a_obs and $a_obs >= 115) {
591	0						$max_conns = 3; # trigonal
592	0						$confidence = ($a_obs - 115) / 5;
593							} elsif (150 > $a_obs and $a_obs >= 120) {
594	0						$max_conns = 3; # trigonal
595	0						$confidence = (150 - $a_obs) / 30;
596							} elsif (109.5 > $a_obs and $a_obs >= 99) {
597	0						$max_conns = 4; # tetrahedral
598	0						$confidence = ($a_obs - 99) / 10.5;
599							} elsif (115 > $a_obs and $a_obs >= 109.5) {
600	0						$max_conns = 4; # tetrahedral
601	0						$confidence = (115 - $a_obs) / 5.5;
602							} elsif ($a_obs < 99) {
603	0						$max_conns = 6; # octahedral
604	0						$confidence = (99 - $a_obs) / 9;
605							} else {
606	0						confess("impossible coordination angle $a_obs!");
607							}
608
609	0						$confidence = 100 $n;
610	0	0					if ($Valences{$atom->symbol}) {
611	0						$atom->attr("bond-find/valence", $Valences{$atom->symbol}[0]);
612							} else {
613	0						$atom->attr("bond-find/valence", $max_conns);
614							}
615	0						$atom->attr("bond-find/confidence", $confidence);
616	0						$atom->attr("bond-find/max_conns", $max_conns);
617	0	0					print "Atom $atom has CN $max_conns with a conf. of $confidence\n"
618							if $DEBUG;
619							}
620							}
621
622							sub resolve_conflicts {
623	0			0	0		my ($mol) = @_;
624
625	0						my $changes = 0;
626	0						for my $atom ($mol->atoms) {
627	0						my $valence = $atom->attr('bond-find/valence');
628	0						my $max_conns = $atom->attr('bond-find/max_conns');
629	0						my $confidence = $atom->attr('bond-find/confidence');
630	0						my $n_conns = $atom->bonds;
631
632	0						my $n_bonds = 0;
633	0						$n_bonds += $_->order for $atom->bonds;
634
635							# this is a modification to avoid cummulative double bonds on
636							# non linear carbon
637	0						my $n_multiple_bonds = grep { $_->order > 1 } $atom->bonds;
	0
638
639	0	0	0				if ($n_conns > $valence) {
		0	0
		0	0
640	0						my $next_valence = next_valence($atom->symbol, $valence);
641	0	0					if ($next_valence) {
642	0	0					print "increasing valence of $atom to $next_valence\n"
643							if $DEBUG;
644	0						$atom->attr('bond-find/valence', $next_valence);
645	0						++$changes, redo;
646							} else {
647	0						warn "too many conns $n_conns to $atom with valence $valence\n";
648	0						next;
649							}
650							} elsif ($n_bonds > $valence
651							or $n_multiple_bonds > 1 and $max_conns > 2 and $atom->symbol eq 'C'
652							) {
653	0						my $next_valence = next_valence($atom->symbol, $valence);
654	0	0					if ($next_valence) {
655	0	0					print "increasing valence of $atom to $next_valence\n"
656							if $DEBUG;
657	0						$atom->attr('bond-find/valence', $next_valence);
658	0						++$changes, next;
659							}
660	0						my ($min_conf, $min_bond);
661	0						for my $bond ($atom->bonds) {
662	0	0	0				if ($bond->order > 1 and
			0
663							(not defined $min_conf
664							or $bond->attr("bond-find/confidence") < $min_conf)) {
665	0						$min_conf = $bond->attr("bond-find/confidence");
666	0						$min_bond = $bond;
667							}
668							}
669	0						my $new_order = $min_bond->order - 1;
670	0						$min_bond->order($new_order);
671	0	0					print "Decreasing order of $min_bond to $new_order\n" if $DEBUG;
672							#$min_bond->attr('bond-find/confidence', $min_conf*1.2+rand());
673	0						++$changes, next;
674							} elsif ($n_bonds + $max_conns - $n_conns < $valence) {
675	0						my ($min_conf, $min_bond);
676	0						for my $bond ($atom->bonds) {
677	0	0	0				if (($bond->order == 1
			0
			0
678							or $atom->symbol =~ /^[CN]$/ and $bond->order == 2) and
679							(not defined $min_conf
680							or $bond->attr("bond-find/confidence") < $min_conf)) {
681	0						$min_conf = $bond->attr("bond-find/confidence");
682	0						$min_bond = $bond;
683							}
684							}
685
686							#print "\t($confidence, $min_conf)\n" if ($atom->id eq 'a1');
687	0	0	0				if ($min_bond and ($confidence > 95 or $min_conf < $confidence)) {
			0
688							#print "XXX: $atom\n";
689	0						my $new_order = $min_bond->order + 1;
690	0						$min_bond->order($new_order);
691	0	0					print "Increasing order of $min_bond to $new_order\n" if $DEBUG;
692							#$min_bond->attr('bond-find/confidence', $min_conf*1.2+rand());
693							#$atom->attr('bond-find/confidence', $confidence / 1.5);
694							} else {
695	0						$max_conns++;
696	0						$atom->attr("bond-find/max_conns", $max_conns);
697	0	0					print "Increasing coord. num. of $atom to $max_conns\n"
698							if $DEBUG;
699							}
700	0						++$changes, next;
701							}
702							}
703	0						$changes;
704							}
705
706							sub next_valence {
707	0			0	0		my ($symbol, $current) = @_;
708
709	0	0					if ($Valences{$symbol}) {
710	0						for my $v (@{$Valences{$symbol}}) {
	0
711	0	0					return $v if $v > $current;
712							}
713							}
714	0						return undef;
715							}
716
717							1;
718
719							=back
720
721							=head1 VERSION
722
723							0.23
724
725							=head1 TO DO
726
727							Some future version should let the user specify the desired cutoffs, and
728							not always create a bond but call a user-supplied function instead. This way
729							these functions could be used for other purposes such as finding hydrogen bonds
730							or neighbor lists.
731
732							Add some tests.
733
734							=head1 SEE ALSO
735
736							L, L, L,
737							L.
738
739							=head1 AUTHOR
740
741							Ivan Tubert-BrohmanEitub@cpan.orgE
742
743							The new C algorithm was loosely based on a suggestion by BrowserUK
744							on perlmonks.org (L).
745
746							=head1 COPYRIGHT
747
748							Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is
749							free software; you can redistribute it and/or modify it under the same terms as
750							Perl itself.
751
752							=cut
753