| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Chemistry::Bond::Find; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
$VERSION = '0.23'; |
|
4
|
|
|
|
|
|
|
our $DEBUG = 0; |
|
5
|
|
|
|
|
|
|
# $Id: Find.pm,v 1.9 2009/05/10 20:03:44 itubert Exp $ |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Chemistry::Bond::Find - Detect bonds in a molecule from atomic 3D coordinates and assign formal bond orders |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
use Chemistry::Bond::Find ':all'; # export all available functions |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# $mol is a Chemistry::Mol object |
|
16
|
|
|
|
|
|
|
find_bonds($mol); |
|
17
|
|
|
|
|
|
|
assign_bond_orders($mol); |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
This module provides functions for detecting the bonds in a molecule from its |
|
22
|
|
|
|
|
|
|
3D coordinates by using simple cutoffs, and for guessing the formal bond |
|
23
|
|
|
|
|
|
|
orders. |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This module is part of the PerlMol project, L. |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=head1 FUNCTIONS |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
These functions may be exported, although nothing is exported by default. |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=over |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=cut |
|
34
|
|
|
|
|
|
|
|
|
35
|
1
|
|
|
1
|
|
24871
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
56
|
|
|
36
|
1
|
|
|
1
|
|
1033
|
use Chemistry::Mol; |
|
|
1
|
|
|
|
|
102308
|
|
|
|
1
|
|
|
|
|
77
|
|
|
37
|
1
|
|
|
1
|
|
13
|
use Exporter; |
|
|
1
|
|
|
|
|
8
|
|
|
|
1
|
|
|
|
|
28
|
|
|
38
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
511
|
|
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
41
|
|
|
|
|
|
|
our @EXPORT_OK = qw(find_bonds assign_bond_orders ); |
|
42
|
|
|
|
|
|
|
our %EXPORT_TAGS = (all => \@EXPORT_OK); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# table taken from |
|
45
|
|
|
|
|
|
|
# http://environmentalchemistry.com/yogi/periodic/covalentradius.html |
|
46
|
|
|
|
|
|
|
our %Covalent_Radius = ( |
|
47
|
|
|
|
|
|
|
Ag => 1.34, Al => 1.18, Ar => 0.98, As => 1.20, At => 1.45, Au => 1.34, |
|
48
|
|
|
|
|
|
|
B => 0.82, Ba => 1.98, Be => 0.90, Bi => 1.46, Br => 1.14, C => 0.77, |
|
49
|
|
|
|
|
|
|
Ca => 1.74, Cd => 1.48, Ce => 1.65, Cl => 0.99, Co => 1.16, Cr => 1.18, |
|
50
|
|
|
|
|
|
|
Cs => 2.35, Cu => 1.17, Dy => 1.59, Er => 1.57, Eu => 1.85, F => 0.72, |
|
51
|
|
|
|
|
|
|
Fe => 1.17, Ga => 1.26, Gd => 1.61, Ge => 1.22, H => 0.32, He => 0.93, |
|
52
|
|
|
|
|
|
|
Hf => 1.44, Hg => 1.49, Ho => 1.58, I => 1.33, In => 1.44, Ir => 1.27, |
|
53
|
|
|
|
|
|
|
K => 2.03, Kr => 1.12, La => 1.69, Li => 1.23, Lu => 1.56, Mg => 1.36, |
|
54
|
|
|
|
|
|
|
Mn => 1.17, Mo => 1.30, N => 0.75, Na => 1.54, Nb => 1.34, Nd => 1.64, |
|
55
|
|
|
|
|
|
|
Ne => 0.71, Ni => 1.15, O => 0.73, Os => 1.26, P => 1.06, Pb => 1.47, |
|
56
|
|
|
|
|
|
|
Pd => 1.28, Pm => 1.63, Po => 1.46, Pr => 1.65, Pt => 1.30, Rb => 2.16, |
|
57
|
|
|
|
|
|
|
Re => 1.28, Rh => 1.25, Ru => 1.25, S => 1.02, Sb => 1.40, Sc => 1.44, |
|
58
|
|
|
|
|
|
|
Se => 1.16, Si => 1.11, Sm => 1.62, Sn => 1.41, Sr => 1.91, Ta => 1.34, |
|
59
|
|
|
|
|
|
|
Tb => 1.59, Tc => 1.27, Te => 1.36, Th => 1.65, Ti => 1.32, Tl => 1.48, |
|
60
|
|
|
|
|
|
|
Tm => 1.56, U => 1.42, V => 1.22, W => 1.30, Xe => 1.31, Y => 1.62, |
|
61
|
|
|
|
|
|
|
Yb => 1.74, Zn => 1.25, Zr => 1.45, |
|
62
|
|
|
|
|
|
|
); |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
our $Default_Radius = 1.5; # radius for unknown elements |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# I considered inlining this function, but the performance gain was minimal |
|
67
|
|
|
|
|
|
|
# ( < 5 % ), so it's probably better to leave it here |
|
68
|
|
|
|
|
|
|
# $opts->{cuttof} Hash Table of cutoffs. |
|
69
|
|
|
|
|
|
|
# Example: key = "H C", value = [0.76, 1.42] |
|
70
|
|
|
|
|
|
|
sub are_bonded { |
|
71
|
0
|
|
|
0
|
0
|
|
my ($symbol_a, $symbol_b, $r, $opts) = @_; |
|
72
|
0
|
|
0
|
|
|
|
return $r < ($opts->{cuttoffs}{"$symbol_a $symbol_b"} ||= |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
73
|
|
|
|
|
|
|
(($Covalent_Radius{$symbol_a} || $Default_Radius) |
|
74
|
|
|
|
|
|
|
+ ($Covalent_Radius{$symbol_b} || $Default_Radius)) |
|
75
|
|
|
|
|
|
|
* $opts->{tolerance}); |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=item C |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Finds and adds the bonds in a molecule. Only use it in molecules that have no |
|
82
|
|
|
|
|
|
|
explicit bonds; for example, after reading a file with 3D coordinates but no |
|
83
|
|
|
|
|
|
|
bond orders. |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Available options: |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=over |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=item tolerance |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Defaults to 1.1. Two atoms are considered to be bound if the distance between |
|
92
|
|
|
|
|
|
|
them is less than the sum of their covalent radii multiplied by the tolerance. |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item margin |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
NOTE: in general setting this option is not recommended, unless you know what |
|
97
|
|
|
|
|
|
|
you are doing. It is used by the space partitioning algorithm to determine the |
|
98
|
|
|
|
|
|
|
"bucket size". It defaults to 2 * Rmax * tolerance, where Rmax is the largest |
|
99
|
|
|
|
|
|
|
covalent radius among the elements found in the molecule. For example, if a |
|
100
|
|
|
|
|
|
|
molecule has C, H, N, O, and I, Rmax = R(I) = 1.33, so the margin defaults to 2 |
|
101
|
|
|
|
|
|
|
* 1.33 * 1.1 = 2.926. This margin ensures that no bonds are missed by the |
|
102
|
|
|
|
|
|
|
partitioning algorithm. |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Using a smaller value gives faster results, but at the risk of missing some |
|
105
|
|
|
|
|
|
|
bonds. In this example, if you are certain that your molecule doesn't contain |
|
106
|
|
|
|
|
|
|
I-I bonds (but it has C-I bonds), you can set margin to (0.77 + 1.33) * 1.1 = |
|
107
|
|
|
|
|
|
|
2.31 and you still won't miss any bonds (0.77 is the radius of carbon). This |
|
108
|
|
|
|
|
|
|
only has a significant impact for molecules with a thousand atoms or more, but |
|
109
|
|
|
|
|
|
|
it can reduce the execution time by 50% in some cases. |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=item orders |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
If true, assign the bond orders after finding them, by calling |
|
114
|
|
|
|
|
|
|
C. |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item bond_class |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
The class that will be used for creating the new bonds. The default is |
|
119
|
|
|
|
|
|
|
the bond class returned by C<< $mol->bond_class >>. |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=back |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=cut |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# The new algorithm based on a suggestion by BrowserUK |
|
126
|
|
|
|
|
|
|
sub find_bonds { |
|
127
|
0
|
|
|
0
|
1
|
|
my ($mol, %opts) = @_; |
|
128
|
0
|
|
|
|
|
|
%opts = (min_atoms => 20, tolerance => 1.1, %opts, |
|
129
|
|
|
|
|
|
|
cutoffs => {}); # set defaults |
|
130
|
0
|
|
|
|
|
|
my $margin = guess_margin($mol, \%opts); |
|
131
|
0
|
|
|
|
|
|
%opts = (margin => $margin, %opts); |
|
132
|
0
|
|
|
|
|
|
my $grid = {}; |
|
133
|
0
|
|
|
|
|
|
partition_space($mol, $grid, \%opts); |
|
134
|
0
|
|
|
|
|
|
find_bonds_grid($mol, $grid, \%opts); |
|
135
|
0
|
0
|
|
|
|
|
if ($opts{orders}) { |
|
136
|
0
|
|
|
|
|
|
assign_bond_orders($mol, %opts); |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
1
|
|
|
1
|
|
1141
|
use POSIX 'floor'; |
|
|
1
|
|
|
|
|
14546
|
|
|
|
1
|
|
|
|
|
8
|
|
|
141
|
|
|
|
|
|
|
my $Y = 1000; |
|
142
|
|
|
|
|
|
|
my $X = $Y * $Y; |
|
143
|
|
|
|
|
|
|
my $Z = 1; |
|
144
|
|
|
|
|
|
|
my $ORIGIN = int(($Y**3 + $Y**2 + $Y)/2); |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# used by the new BrowserUK algorithm |
|
147
|
|
|
|
|
|
|
sub partition_space { |
|
148
|
0
|
|
|
0
|
0
|
|
my ($mol, $grid, $opts) = @_; |
|
149
|
0
|
|
|
|
|
|
my $margin = $opts->{margin}; |
|
150
|
0
|
|
|
|
|
|
for my $atom ($mol->atoms) { |
|
151
|
0
|
|
|
|
|
|
my (@vec) = $atom->coords->array; |
|
152
|
0
|
|
|
|
|
|
my (@norm_vec) = map { floor($_ / $margin) } @vec; |
|
|
0
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
my $n = $X * $norm_vec[0] + $Y * $norm_vec[1] |
|
154
|
|
|
|
|
|
|
+ $norm_vec[2] + $ORIGIN; |
|
155
|
0
|
|
|
|
|
|
push @{$grid->{$n}}, $atom; |
|
|
0
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# used by the new BrowserUK algorithm |
|
160
|
|
|
|
|
|
|
sub find_bonds_grid { |
|
161
|
0
|
|
|
0
|
0
|
|
my ($mol, $grid, $opts) = @_; |
|
162
|
0
|
|
|
|
|
|
while (my ($n, $atoms) = each %$grid) { |
|
163
|
|
|
|
|
|
|
#print "Cell $n has ". @$atoms . " atoms\n"; |
|
164
|
0
|
|
|
|
|
|
find_bonds_n2_one_set($mol, $atoms, $opts); |
|
165
|
0
|
|
|
|
|
|
for my $neigh_n ( |
|
166
|
|
|
|
|
|
|
$n+$Z, |
|
167
|
|
|
|
|
|
|
$n+$Z+$Y, $n+$Z-$Y, $n+$Z+$X, $n+$Z-$X, |
|
168
|
|
|
|
|
|
|
$n+$Z+$Y+$X, $n+$Z+$Y-$X, $n+$Z-$Y+$X, $n+$Z-$Y-$X, |
|
169
|
|
|
|
|
|
|
$n+$Y, $n+$Y+$X, $n+$Y-$X, |
|
170
|
|
|
|
|
|
|
$n+$X, |
|
171
|
|
|
|
|
|
|
) { |
|
172
|
0
|
0
|
|
|
|
|
if ($grid->{$neigh_n}) { |
|
173
|
0
|
|
|
|
|
|
find_bonds_n2_two_sets($mol, $atoms, $grid->{$neigh_n}, $opts); |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# used by both find_bonds variants to figure out the maximum cutoff |
|
180
|
|
|
|
|
|
|
sub guess_margin { |
|
181
|
0
|
|
|
0
|
0
|
|
my ($mol, $opts) = @_; |
|
182
|
0
|
|
|
|
|
|
my $formula = $mol->formula_hash; |
|
183
|
0
|
|
|
|
|
|
my $max = 0; |
|
184
|
0
|
|
|
|
|
|
for my $elem (keys %$formula) { |
|
185
|
0
|
0
|
|
|
|
|
$max = $Covalent_Radius{$elem} if $Covalent_Radius{$elem} > $max; |
|
186
|
|
|
|
|
|
|
} |
|
187
|
0
|
|
|
|
|
|
$max *= 2 * $opts->{tolerance}; |
|
188
|
|
|
|
|
|
|
#printf "MARGIN guessed at (%.2f)\n", $max; |
|
189
|
0
|
|
|
|
|
|
$max; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# brute-force N^2 algorithm |
|
193
|
|
|
|
|
|
|
sub find_bonds_n2_one_set { |
|
194
|
0
|
|
|
0
|
0
|
|
my ($mol, $atoms, $opts) = @_; |
|
195
|
0
|
|
0
|
|
|
|
my $bond_class = $opts->{bond_class} || $mol->bond_class; |
|
196
|
0
|
|
|
|
|
|
for (my $i = 0; $i < @$atoms; ++$i) { |
|
197
|
0
|
|
|
|
|
|
for (my $j = $i + 1; $j < @$atoms; ++$j) { |
|
198
|
0
|
|
|
|
|
|
my ($a1, $a2) = ($atoms->[$i], $atoms->[$j]); |
|
199
|
0
|
0
|
|
|
|
|
if (are_bonded($a1->symbol, $a2->symbol, scalar $a1->distance($a2), $opts)) { |
|
200
|
0
|
|
|
|
|
|
$mol->add_bond($bond_class->new(atoms=>[$a1, $a2])); |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
} |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# brute force N*M algorithm for finding bonds between to sets of atoms |
|
207
|
|
|
|
|
|
|
sub find_bonds_n2_two_sets { |
|
208
|
0
|
|
|
0
|
0
|
|
my ($mol, $atoms1, $atoms2, $opts) = @_; |
|
209
|
0
|
|
0
|
|
|
|
my $bond_class = $opts->{bond_class} || $mol->bond_class; |
|
210
|
0
|
|
|
|
|
|
for my $a1 (@$atoms1) { |
|
211
|
0
|
|
|
|
|
|
for my $a2 (@$atoms2) { |
|
212
|
0
|
0
|
|
|
|
|
if (are_bonded($a1->symbol, $a2->symbol, scalar $a1->distance($a2), $opts)) { |
|
213
|
0
|
|
|
|
|
|
$mol->add_bond($bond_class->new(atoms=>[$a1, $a2])); |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
} |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=item C |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Assign the formal bond orders in a molecule. The bonds must already be defined, |
|
223
|
|
|
|
|
|
|
either by C or because the molecule was read from a file that |
|
224
|
|
|
|
|
|
|
included bonds but no bond orders. If the bond orders were already defined |
|
225
|
|
|
|
|
|
|
(maybe the molecule came from a file that did include bond orders after all), |
|
226
|
|
|
|
|
|
|
the original bond orders are erased and the process begins from scratch. Two |
|
227
|
|
|
|
|
|
|
different algorithms are available, and may be selected by using the "method" |
|
228
|
|
|
|
|
|
|
option: |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
assign_bond_orders($mol, method => 'itub'); |
|
231
|
|
|
|
|
|
|
assign_bond_orders($mol, method => 'baber'); |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=over |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=item itub |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
This is the default if no method is specified. Developed from scratch by the |
|
238
|
|
|
|
|
|
|
author of this module, this algorithm requires only the connection table |
|
239
|
|
|
|
|
|
|
information, and it requires that all hydrogen atoms be explicit. It looks for |
|
240
|
|
|
|
|
|
|
an atom with unsatisfied valence, increases a bond order, and then does the |
|
241
|
|
|
|
|
|
|
same recursively for the neighbors. If everybody's not happy at the end, it |
|
242
|
|
|
|
|
|
|
backtracks and tries another bond. The recursive part does not cover the whole |
|
243
|
|
|
|
|
|
|
molecule, but only the contiguous region of "unhappy" atoms next to the |
|
244
|
|
|
|
|
|
|
starting atom and their neighbors. This permits separating the molecule into |
|
245
|
|
|
|
|
|
|
independent regions, so that if one is solved and there's a problem in another, |
|
246
|
|
|
|
|
|
|
we don't have to backtrack to the first one. |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
The itub algorithm has the following additional options: |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=over |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=item use_coords |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Although the algorithm does not I 3D coordinates, it uses them by |
|
255
|
|
|
|
|
|
|
default to improve the initial guesses of which bond orders should be |
|
256
|
|
|
|
|
|
|
increased. To avoid using coordinates, add the C option with a |
|
257
|
|
|
|
|
|
|
false value: |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
assign_bond_orders($mol, use_coords => 0); |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
The results are the same most of the time, but using good coordinates improves |
|
262
|
|
|
|
|
|
|
the results for complicated cases such as fused heteroaromatic systems. |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=item scratch |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
If true, start the bond order assignment from scratch by assuming that all bond |
|
267
|
|
|
|
|
|
|
orders are 1. If false, start from the current bond orders and try to fix the |
|
268
|
|
|
|
|
|
|
unsatisfied valences. This option is true by default. |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=back |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=item baber |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
A bond order assignment algorithm based on Baber, J. C.; Hodgkin, E. E. |
|
275
|
|
|
|
|
|
|
J. Chem. Inf. Comp. Sci. 1992, 32, 401-406 (with some interpretation). |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
This algorithm uses the 3D coordinates along with various cutoffs and |
|
278
|
|
|
|
|
|
|
confidence formulas to guess the bond orders. It then tries to resolve |
|
279
|
|
|
|
|
|
|
conflicts by looping through the atoms (but is not recursive or backtracking). |
|
280
|
|
|
|
|
|
|
It does not require explicit hydrogens (although it's better when they are |
|
281
|
|
|
|
|
|
|
available) because it was designed for use with real crystallographic data |
|
282
|
|
|
|
|
|
|
which often doesn't have hydrogen atoms. |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
This method doesn't always give a good answer, especially for conjugated and |
|
285
|
|
|
|
|
|
|
aromatic systems. The variation used in this module adds some random numbers to |
|
286
|
|
|
|
|
|
|
resolve some ambiguities and break loops, so the results are not even entirely |
|
287
|
|
|
|
|
|
|
deterministic (the 'itub' method is deterministic but the result may depend on |
|
288
|
|
|
|
|
|
|
the input order of the atoms). |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=back |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=cut |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub assign_bond_orders { |
|
295
|
0
|
|
|
0
|
1
|
|
my ($mol, %opts) = @_; |
|
296
|
0
|
0
|
0
|
|
|
|
if ($opts{method} and $opts{method} eq 'baber') { |
|
297
|
0
|
|
|
|
|
|
assign_bond_orders_baber($mol, %opts); |
|
298
|
|
|
|
|
|
|
} else { |
|
299
|
0
|
|
|
|
|
|
assign_bond_orders_itub($mol, %opts); |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
####### Bond order assignment algorithm by Ivan Tubert-Brohman |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# The "typical" valence that we expect an atom to have satisfied. If not |
|
306
|
|
|
|
|
|
|
# given, a value of 1 is assumed. |
|
307
|
|
|
|
|
|
|
my %MIN_VALENCES = ( O => 2, C => 4, S => 2, H => 1, N => 3, P => 3, Si => 2, |
|
308
|
|
|
|
|
|
|
F => 1, Cl => 1, Br => 1, I => 1 ); |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# $ALLOWED_INCREASES{$from}{$to} means that element $from is willing to |
|
311
|
|
|
|
|
|
|
# exceed its minimum valence by having a multiple bond to element $to. |
|
312
|
|
|
|
|
|
|
my %ALLOWED_INCREASES = ( |
|
313
|
|
|
|
|
|
|
Cl => { O => 7 }, |
|
314
|
|
|
|
|
|
|
Br => { O => 7 }, |
|
315
|
|
|
|
|
|
|
I => { O => 7 }, |
|
316
|
|
|
|
|
|
|
S => { O => 6, C => 3, S => 4 }, |
|
317
|
|
|
|
|
|
|
N => { O => 5, C => 4, N => 4 }, |
|
318
|
|
|
|
|
|
|
Si => { O => 4, C => 4 }, |
|
319
|
|
|
|
|
|
|
O => { C => 3, O => 3 }, |
|
320
|
|
|
|
|
|
|
P => { O => 5, C => 4 }, |
|
321
|
|
|
|
|
|
|
); |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub assign_bond_orders_itub { |
|
324
|
0
|
|
|
0
|
0
|
|
my ($mol, %opts) = @_; |
|
325
|
0
|
|
|
|
|
|
%opts = (use_coords => 1, scratch => 1, funny => {}, %opts); |
|
326
|
|
|
|
|
|
|
#my @funny_atoms; |
|
327
|
|
|
|
|
|
|
|
|
328
|
0
|
0
|
|
|
|
|
if ($opts{scratch}) { |
|
329
|
0
|
|
|
|
|
|
$_->order(1) for $mol->bonds; |
|
330
|
|
|
|
|
|
|
} |
|
331
|
0
|
|
|
|
|
|
for my $atom ($mol->atoms) { |
|
332
|
0
|
0
|
|
|
|
|
if (wants_more_bonds($atom)) { |
|
333
|
0
|
|
|
|
|
|
my $ret = make_happy($atom, \%opts, []); |
|
334
|
0
|
0
|
|
|
|
|
print "Atom $atom made happy? '$ret'\n" if $DEBUG; |
|
335
|
0
|
0
|
|
|
|
|
unless ($ret) { # atom is funny (has formal charge or unpaired e-) |
|
336
|
0
|
|
|
|
|
|
$opts{funny}{$atom} = 1; |
|
337
|
|
|
|
|
|
|
#push @funny_atoms, $atom; |
|
338
|
|
|
|
|
|
|
} |
|
339
|
|
|
|
|
|
|
} |
|
340
|
|
|
|
|
|
|
} |
|
341
|
|
|
|
|
|
|
} |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
sub wants_more_bonds { |
|
344
|
0
|
|
|
0
|
0
|
|
my ($atom) = @_; |
|
345
|
0
|
|
|
|
|
|
my $valence = $atom->valence; |
|
346
|
|
|
|
|
|
|
#$n_bonds += $_->order for $atom->bonds; |
|
347
|
|
|
|
|
|
|
|
|
348
|
0
|
|
0
|
|
|
|
my $ret = ($valence < ($MIN_VALENCES{$atom->symbol} || 1)); |
|
349
|
0
|
0
|
|
|
|
|
print " $atom wants more bonds? '$ret'\n" if $DEBUG; |
|
350
|
0
|
|
|
|
|
|
$ret; |
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
sub valid_bonds { |
|
354
|
0
|
|
|
0
|
0
|
|
my ($atom) = @_; |
|
355
|
0
|
|
|
|
|
|
my $valence = $atom->valence; |
|
356
|
|
|
|
|
|
|
#$n_bonds += $_->order for $atom->bonds; |
|
357
|
|
|
|
|
|
|
|
|
358
|
0
|
|
0
|
|
|
|
my $ret = ($valence <= ($MIN_VALENCES{$atom->symbol} || 1)); |
|
359
|
0
|
0
|
|
|
|
|
print " $atom wants more bonds? '$ret'\n" if $DEBUG; |
|
360
|
0
|
|
|
|
|
|
$ret; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# the heart of the algorithm; increase a bond order, and then do the same |
|
364
|
|
|
|
|
|
|
# recursively for the neighbors. If everybody's not happy at the end, |
|
365
|
|
|
|
|
|
|
# backtrack and try another bond. Note that this search does not cover the |
|
366
|
|
|
|
|
|
|
# whole molecule, but only the "unhappy" atoms and their neighbors. This |
|
367
|
|
|
|
|
|
|
# permits separating the molecule into independent regions, so that if one |
|
368
|
|
|
|
|
|
|
# is solved and there's a problem in another, we don't have to backtrack |
|
369
|
|
|
|
|
|
|
# to the first one. |
|
370
|
|
|
|
|
|
|
sub make_happy { |
|
371
|
0
|
|
|
0
|
0
|
|
my ($atom, $opts, $q) = @_; |
|
372
|
|
|
|
|
|
|
|
|
373
|
0
|
0
|
0
|
|
|
|
if (!$opts->{funny}{$atom} and wants_more_bonds($atom)) { |
|
374
|
0
|
|
|
|
|
|
push @$q, ($atom->neighbors); # the queue of atoms to be checked |
|
375
|
0
|
|
|
|
|
|
for my $bn (sorted_neighbors($atom, $opts)) { |
|
376
|
0
|
|
|
|
|
|
my ($nei, $bond) = @$bn{'to', 'bond'}; |
|
377
|
|
|
|
|
|
|
# note: it would be better to find the bond that is most likely to |
|
378
|
|
|
|
|
|
|
# be increased by taking bond length into account |
|
379
|
0
|
0
|
|
|
|
|
if (accepts_more_bonds($nei, $atom)) { |
|
380
|
0
|
|
|
|
|
|
my $order = $bond->order; |
|
381
|
0
|
0
|
|
|
|
|
print "increasing $bond($atom-$nei)\n" if $DEBUG; |
|
382
|
0
|
|
|
|
|
|
$bond->order($order+1); # increase order, be happy |
|
383
|
|
|
|
|
|
|
# now make sure everybody's happy |
|
384
|
0
|
|
|
|
|
|
push @$q, $nei->neighbors($atom); # our friend's neighbors |
|
385
|
|
|
|
|
|
|
# need to be happy too |
|
386
|
0
|
0
|
|
|
|
|
if (make_happy($atom, $opts, $q)) { # are we happy now? |
|
387
|
0
|
|
|
|
|
|
return 1; # everybody's happy? |
|
388
|
|
|
|
|
|
|
} else { |
|
389
|
|
|
|
|
|
|
# something's wrong, will have to backtrack |
|
390
|
0
|
0
|
|
|
|
|
print "decreasing $bond($atom-$nei)\n" if $DEBUG; |
|
391
|
0
|
|
|
|
|
|
$bond->order($order); |
|
392
|
0
|
|
|
|
|
|
push @$q, $nei; # remember; neighbor's not happy either |
|
393
|
|
|
|
|
|
|
} |
|
394
|
|
|
|
|
|
|
} |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
# couldn't find happiness |
|
397
|
|
|
|
|
|
|
} else { |
|
398
|
0
|
|
|
|
|
|
my $next = shift @$q; |
|
399
|
0
|
0
|
|
|
|
|
unless ($next) { # no one left; everybody is happy! |
|
400
|
0
|
0
|
|
|
|
|
print "no more atoms left to check at $atom\n" if $DEBUG; |
|
401
|
0
|
|
|
|
|
|
return 1; |
|
402
|
|
|
|
|
|
|
} |
|
403
|
0
|
|
|
|
|
|
return (make_happy($next, $opts, $q)); # happy if next atom is happy |
|
404
|
|
|
|
|
|
|
} |
|
405
|
0
|
0
|
|
|
|
|
print "not happy at $atom\n" if $DEBUG; |
|
406
|
0
|
|
|
|
|
|
0; # not happy |
|
407
|
|
|
|
|
|
|
} |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
sub sorted_neighbors { |
|
410
|
0
|
|
|
0
|
0
|
|
my ($atom, $opts) = @_; |
|
411
|
0
|
|
|
|
|
|
my $use_coords = $opts->{use_coords}; |
|
412
|
|
|
|
|
|
|
|
|
413
|
0
|
0
|
|
|
|
|
print "sorting neighbors\n" if $DEBUG; |
|
414
|
0
|
|
|
|
|
|
return map { |
|
415
|
0
|
0
|
|
|
|
|
$_->{bn} |
|
|
|
0
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
} sort { |
|
417
|
0
|
|
0
|
|
|
|
($b->{wants} cmp $a->{wants}) # those who want go first |
|
418
|
|
|
|
|
|
|
|| ($use_coords ? ($a->{len} <=> $b->{len}) : 0); |
|
419
|
|
|
|
|
|
|
# if they both want to the same degree, the shortest |
|
420
|
|
|
|
|
|
|
# bond goes first if we are using coords |
|
421
|
|
|
|
|
|
|
} map { |
|
422
|
0
|
|
|
|
|
|
+{ |
|
423
|
|
|
|
|
|
|
wants => wants_more_bonds($_->{to}), |
|
424
|
|
|
|
|
|
|
bn => $_, |
|
425
|
|
|
|
|
|
|
len => !$use_coords || $_->{bond}->length, |
|
426
|
|
|
|
|
|
|
} |
|
427
|
|
|
|
|
|
|
} $atom->bonds_neighbors; |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub accepts_more_bonds { |
|
431
|
0
|
|
|
0
|
0
|
|
my ($atom, $to) = @_; |
|
432
|
0
|
|
|
|
|
|
my ($symbol, $to_symbol) = ($atom->symbol, $to->symbol); |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
#my $n_bonds = 0; |
|
435
|
|
|
|
|
|
|
#$n_bonds += $_->order for $atom->bonds; |
|
436
|
0
|
|
|
|
|
|
my $valence = $atom->valence; |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
# not enough bonds even for the minimum valence? |
|
439
|
0
|
0
|
0
|
|
|
|
return 1 if ($valence < ($MIN_VALENCES{$atom->symbol} || 1)); |
|
440
|
|
|
|
|
|
|
|
|
441
|
0
|
0
|
0
|
|
|
|
if ($valence < ($ALLOWED_INCREASES{$symbol}{$to_symbol} || 0)) { |
|
442
|
|
|
|
|
|
|
# make sure we are willing to make multiple bonds with this guy |
|
443
|
0
|
|
|
|
|
|
return 1; |
|
444
|
|
|
|
|
|
|
} else { |
|
445
|
0
|
|
|
|
|
|
return 0; # max. valence satisfied |
|
446
|
|
|
|
|
|
|
} |
|
447
|
|
|
|
|
|
|
} |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
############ |
|
451
|
|
|
|
|
|
|
# Bond order assignment algorithm based on Baber, J. C.; Hodgkin, E. E. |
|
452
|
|
|
|
|
|
|
# J. Chem. Inf. Comp. Sci. 1992, 32, 401-406 |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
# this algorithm uses the 3D coordinates along with various cutoffs and |
|
455
|
|
|
|
|
|
|
# confidence formulas to assign the bond orders. It does not require all |
|
456
|
|
|
|
|
|
|
# explicit hydrogens (although it's better when they are available) because |
|
457
|
|
|
|
|
|
|
# it's design for use with real crystallographic data which often doesn't |
|
458
|
|
|
|
|
|
|
# have hydrogen. |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
my %Valences = ( |
|
461
|
|
|
|
|
|
|
C => [4], N => [3,4], O => [2], |
|
462
|
|
|
|
|
|
|
P => [4, 5], S => [2, 4, 6], As => [4, 5], Se => [2, 4, 6], |
|
463
|
|
|
|
|
|
|
Te => [2, 4, 6], |
|
464
|
|
|
|
|
|
|
F => [1], Cl => [1, 3], Br => [1, 3, 5], I => [1, 3, 5, 7], |
|
465
|
|
|
|
|
|
|
H => [1], |
|
466
|
|
|
|
|
|
|
); |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
# note; this table should only have single-letter symbols |
|
469
|
|
|
|
|
|
|
my %Bond_Orders = ( |
|
470
|
|
|
|
|
|
|
"C C" => { is => 1.49, id => 1.31, it => 1.18, wsd => 1.38, wdt => 1.21 }, |
|
471
|
|
|
|
|
|
|
"C N" => { is => 1.42, id => 1.32, it => 1.14, wsd => 1.34, wdt => 1.20 }, |
|
472
|
|
|
|
|
|
|
"C O" => { is => 1.41, id => 1.22, wsd => 1.28 }, |
|
473
|
|
|
|
|
|
|
"C S" => { is => 1.78, id => 1.68, wsd => 1.70 }, |
|
474
|
|
|
|
|
|
|
"N N" => { is => 1.40, id => 1.22, wsd => 1.32 }, |
|
475
|
|
|
|
|
|
|
"N O" => { is => 1.39, id => 1.22, wsd => 1.25 }, |
|
476
|
|
|
|
|
|
|
"O S" => { is => 1.58, id => 1.45, wsd => 1.54 }, |
|
477
|
|
|
|
|
|
|
#"O S" => { is => 1.58, id => 1.45, wsd => 1.51 }, # modified value |
|
478
|
|
|
|
|
|
|
"O P" => { is => 1.60, id => 1.48, wsd => 1.52 }, |
|
479
|
|
|
|
|
|
|
); |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
# add keys for reverse bond for ease of lookup |
|
482
|
|
|
|
|
|
|
for (keys %Bond_Orders) { |
|
483
|
|
|
|
|
|
|
$Bond_Orders{ scalar reverse $_ } = $Bond_Orders{$_}; |
|
484
|
|
|
|
|
|
|
} |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub assign_bond_orders_baber { |
|
487
|
0
|
|
|
0
|
0
|
|
my ($mol, %opts) = @_; |
|
488
|
0
|
|
0
|
|
|
|
my $max_tries = $opts{max_tries} || 10; |
|
489
|
|
|
|
|
|
|
|
|
490
|
0
|
|
|
|
|
|
assign_initial_bonds($mol); |
|
491
|
0
|
|
|
|
|
|
assign_initial_coordinations($mol); |
|
492
|
0
|
|
|
|
|
|
my $tries = 0; |
|
493
|
0
|
|
|
|
|
|
while (resolve_conflicts($mol)) { |
|
494
|
0
|
0
|
|
|
|
|
if ($tries++ > $max_tries) { |
|
495
|
0
|
0
|
|
|
|
|
print "too many tries\n" if $DEBUG; |
|
496
|
0
|
|
|
|
|
|
last; |
|
497
|
|
|
|
|
|
|
} |
|
498
|
0
|
0
|
|
|
|
|
print "try again\n" if $DEBUG; |
|
499
|
|
|
|
|
|
|
} |
|
500
|
0
|
|
|
|
|
|
$tries; |
|
501
|
|
|
|
|
|
|
} |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
sub assign_initial_bonds { |
|
504
|
0
|
|
|
0
|
0
|
|
my ($mol) = @_; |
|
505
|
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
|
for my $bond ($mol->bonds) { |
|
507
|
|
|
|
|
|
|
#my %bond_has = map { ($_->symbol, 1 ) } $bond->atoms; |
|
508
|
0
|
|
|
|
|
|
my $symbols = join " ", map { $_->symbol } $bond->atoms; |
|
|
0
|
|
|
|
|
|
|
|
509
|
0
|
|
|
|
|
|
my ($order, $confidence); |
|
510
|
0
|
|
|
|
|
|
my $l_obs = $bond->length; |
|
511
|
0
|
0
|
|
|
|
|
if ($symbols =~ /\b(H|F|Cl|Br|I)\b/) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
512
|
0
|
|
|
|
|
|
$order = 1; |
|
513
|
0
|
|
|
|
|
|
$confidence = 10000; |
|
514
|
|
|
|
|
|
|
} elsif ($symbols =~ /\bSi\b/) { |
|
515
|
0
|
|
|
|
|
|
$order = 1; |
|
516
|
0
|
|
|
|
|
|
$confidence = 20 * ($l_obs - 1.4); |
|
517
|
|
|
|
|
|
|
} elsif ($symbols =~ /\bB\b/) { |
|
518
|
0
|
|
|
|
|
|
$order = 1; |
|
519
|
0
|
|
|
|
|
|
$confidence = 20 * ($l_obs - 1.2); |
|
520
|
|
|
|
|
|
|
} elsif (my $pars = $Bond_Orders{$symbols}) { |
|
521
|
0
|
0
|
|
|
|
|
if ($pars->{it}) { # may have triple bonds |
|
522
|
0
|
0
|
0
|
|
|
|
if ($l_obs > $pars->{wsd}) { |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
523
|
0
|
|
|
|
|
|
$order = 1; |
|
524
|
0
|
|
|
|
|
|
$confidence = ($l_obs - $pars->{wsd}) / |
|
525
|
|
|
|
|
|
|
(2 * ($pars->{is} - $pars->{wsd})); |
|
526
|
|
|
|
|
|
|
} elsif ($pars->{id} < $l_obs and $l_obs < $pars->{wsd}) { |
|
527
|
0
|
|
|
|
|
|
$order = 2; |
|
528
|
0
|
|
|
|
|
|
$confidence = ($pars->{wsd} - $l_obs) / |
|
529
|
|
|
|
|
|
|
(2 * ($pars->{wsd} - $pars->{id})); |
|
530
|
|
|
|
|
|
|
} elsif ($pars->{id} > $l_obs and $l_obs > $pars->{wdt}) { |
|
531
|
0
|
|
|
|
|
|
$order = 2; |
|
532
|
0
|
|
|
|
|
|
$confidence = ($l_obs - $pars->{wdt}) / |
|
533
|
|
|
|
|
|
|
(2 * ($pars->{id} - $pars->{wdt})); |
|
534
|
|
|
|
|
|
|
} else { |
|
535
|
0
|
|
|
|
|
|
$order = 3; |
|
536
|
0
|
|
|
|
|
|
$confidence = ($pars->{wdt} - $l_obs) / |
|
537
|
|
|
|
|
|
|
(2 * ($pars->{wdt} - $pars->{it})); |
|
538
|
|
|
|
|
|
|
} |
|
539
|
|
|
|
|
|
|
} else { # only single and double |
|
540
|
0
|
0
|
|
|
|
|
if ($l_obs > $pars->{wsd}) { |
|
541
|
0
|
|
|
|
|
|
$order = 1; |
|
542
|
0
|
|
|
|
|
|
$confidence = ($l_obs - $pars->{wsd}) / |
|
543
|
|
|
|
|
|
|
(2 * ($pars->{is} - $pars->{wsd})); |
|
544
|
|
|
|
|
|
|
} else { |
|
545
|
0
|
|
|
|
|
|
$order = 2; |
|
546
|
0
|
|
|
|
|
|
$confidence = ($pars->{wsd} - $l_obs) / |
|
547
|
|
|
|
|
|
|
(2 * ($pars->{wsd} - $pars->{id})); |
|
548
|
|
|
|
|
|
|
} |
|
549
|
|
|
|
|
|
|
} |
|
550
|
|
|
|
|
|
|
} else { # unknown atom pair; let's assume it's always single |
|
551
|
0
|
|
|
|
|
|
$order = 1; |
|
552
|
0
|
|
|
|
|
|
$confidence = 100_000; |
|
553
|
|
|
|
|
|
|
} |
|
554
|
|
|
|
|
|
|
|
|
555
|
0
|
|
|
|
|
|
$confidence *= 10; # normalization constant |
|
556
|
0
|
|
|
|
|
|
$bond->order($order); |
|
557
|
0
|
|
|
|
|
|
$bond->attr("bond-find/confidence", $confidence); |
|
558
|
0
|
0
|
|
|
|
|
print "bond $bond($symbols) len=$l_obs, order=$order, ", |
|
559
|
|
|
|
|
|
|
"conf=$confidence\n" |
|
560
|
|
|
|
|
|
|
if $DEBUG; |
|
561
|
|
|
|
|
|
|
} |
|
562
|
|
|
|
|
|
|
} |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
sub assign_initial_coordinations { |
|
565
|
0
|
|
|
0
|
0
|
|
my ($mol) = @_; |
|
566
|
|
|
|
|
|
|
|
|
567
|
0
|
|
|
|
|
|
for my $atom ($mol->atoms) { |
|
568
|
0
|
|
|
|
|
|
my @neighbors = $atom->neighbors; |
|
569
|
0
|
|
|
|
|
|
my $a_obs = 0; |
|
570
|
0
|
|
|
|
|
|
my $n = 0; |
|
571
|
0
|
|
|
|
|
|
my ($max_conns, $confidence); |
|
572
|
0
|
|
|
|
|
|
for (my $i = 0; $i < @neighbors - 1; $i++) { |
|
573
|
0
|
|
|
|
|
|
for (my $j = $i + 1; $j < @neighbors; $j++) { |
|
574
|
0
|
|
|
|
|
|
my $angle = Chemistry::Atom::angle_deg($neighbors[$i], |
|
575
|
|
|
|
|
|
|
$atom, $neighbors[$j]); |
|
576
|
|
|
|
|
|
|
# don't count linear angles for octahedral geometries |
|
577
|
0
|
0
|
0
|
|
|
|
$n++, $a_obs += $angle unless @neighbors > 2 and $angle > 150; |
|
578
|
|
|
|
|
|
|
} |
|
579
|
|
|
|
|
|
|
} |
|
580
|
0
|
0
|
|
|
|
|
$a_obs /= $n if $n; |
|
581
|
|
|
|
|
|
|
|
|
582
|
0
|
0
|
0
|
|
|
|
if ($n == 0) { |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
# this condition is not described explicitly in the article |
|
584
|
0
|
|
|
|
|
|
$max_conns = 1; # undefined |
|
585
|
0
|
|
|
|
|
|
$confidence = 0; |
|
586
|
0
|
|
|
|
|
|
$n = 1; |
|
587
|
|
|
|
|
|
|
} elsif ($a_obs > 150) { |
|
588
|
0
|
|
|
|
|
|
$max_conns = 2; # linear |
|
589
|
0
|
|
|
|
|
|
$confidence = ($a_obs - 150) / 30; |
|
590
|
|
|
|
|
|
|
} elsif (120 > $a_obs and $a_obs >= 115) { |
|
591
|
0
|
|
|
|
|
|
$max_conns = 3; # trigonal |
|
592
|
0
|
|
|
|
|
|
$confidence = ($a_obs - 115) / 5; |
|
593
|
|
|
|
|
|
|
} elsif (150 > $a_obs and $a_obs >= 120) { |
|
594
|
0
|
|
|
|
|
|
$max_conns = 3; # trigonal |
|
595
|
0
|
|
|
|
|
|
$confidence = (150 - $a_obs) / 30; |
|
596
|
|
|
|
|
|
|
} elsif (109.5 > $a_obs and $a_obs >= 99) { |
|
597
|
0
|
|
|
|
|
|
$max_conns = 4; # tetrahedral |
|
598
|
0
|
|
|
|
|
|
$confidence = ($a_obs - 99) / 10.5; |
|
599
|
|
|
|
|
|
|
} elsif (115 > $a_obs and $a_obs >= 109.5) { |
|
600
|
0
|
|
|
|
|
|
$max_conns = 4; # tetrahedral |
|
601
|
0
|
|
|
|
|
|
$confidence = (115 - $a_obs) / 5.5; |
|
602
|
|
|
|
|
|
|
} elsif ($a_obs < 99) { |
|
603
|
0
|
|
|
|
|
|
$max_conns = 6; # octahedral |
|
604
|
0
|
|
|
|
|
|
$confidence = (99 - $a_obs) / 9; |
|
605
|
|
|
|
|
|
|
} else { |
|
606
|
0
|
|
|
|
|
|
confess("impossible coordination angle $a_obs!"); |
|
607
|
|
|
|
|
|
|
} |
|
608
|
|
|
|
|
|
|
|
|
609
|
0
|
|
|
|
|
|
$confidence *= 100 * $n; |
|
610
|
0
|
0
|
|
|
|
|
if ($Valences{$atom->symbol}) { |
|
611
|
0
|
|
|
|
|
|
$atom->attr("bond-find/valence", $Valences{$atom->symbol}[0]); |
|
612
|
|
|
|
|
|
|
} else { |
|
613
|
0
|
|
|
|
|
|
$atom->attr("bond-find/valence", $max_conns); |
|
614
|
|
|
|
|
|
|
} |
|
615
|
0
|
|
|
|
|
|
$atom->attr("bond-find/confidence", $confidence); |
|
616
|
0
|
|
|
|
|
|
$atom->attr("bond-find/max_conns", $max_conns); |
|
617
|
0
|
0
|
|
|
|
|
print "Atom $atom has CN $max_conns with a conf. of $confidence\n" |
|
618
|
|
|
|
|
|
|
if $DEBUG; |
|
619
|
|
|
|
|
|
|
} |
|
620
|
|
|
|
|
|
|
} |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
sub resolve_conflicts { |
|
623
|
0
|
|
|
0
|
0
|
|
my ($mol) = @_; |
|
624
|
|
|
|
|
|
|
|
|
625
|
0
|
|
|
|
|
|
my $changes = 0; |
|
626
|
0
|
|
|
|
|
|
for my $atom ($mol->atoms) { |
|
627
|
0
|
|
|
|
|
|
my $valence = $atom->attr('bond-find/valence'); |
|
628
|
0
|
|
|
|
|
|
my $max_conns = $atom->attr('bond-find/max_conns'); |
|
629
|
0
|
|
|
|
|
|
my $confidence = $atom->attr('bond-find/confidence'); |
|
630
|
0
|
|
|
|
|
|
my $n_conns = $atom->bonds; |
|
631
|
|
|
|
|
|
|
|
|
632
|
0
|
|
|
|
|
|
my $n_bonds = 0; |
|
633
|
0
|
|
|
|
|
|
$n_bonds += $_->order for $atom->bonds; |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# this is a modification to avoid cummulative double bonds on |
|
636
|
|
|
|
|
|
|
# non linear carbon |
|
637
|
0
|
|
|
|
|
|
my $n_multiple_bonds = grep { $_->order > 1 } $atom->bonds; |
|
|
0
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
|
|
639
|
0
|
0
|
0
|
|
|
|
if ($n_conns > $valence) { |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
640
|
0
|
|
|
|
|
|
my $next_valence = next_valence($atom->symbol, $valence); |
|
641
|
0
|
0
|
|
|
|
|
if ($next_valence) { |
|
642
|
0
|
0
|
|
|
|
|
print "increasing valence of $atom to $next_valence\n" |
|
643
|
|
|
|
|
|
|
if $DEBUG; |
|
644
|
0
|
|
|
|
|
|
$atom->attr('bond-find/valence', $next_valence); |
|
645
|
0
|
|
|
|
|
|
++$changes, redo; |
|
646
|
|
|
|
|
|
|
} else { |
|
647
|
0
|
|
|
|
|
|
warn "too many conns $n_conns to $atom with valence $valence\n"; |
|
648
|
0
|
|
|
|
|
|
next; |
|
649
|
|
|
|
|
|
|
} |
|
650
|
|
|
|
|
|
|
} elsif ($n_bonds > $valence |
|
651
|
|
|
|
|
|
|
or $n_multiple_bonds > 1 and $max_conns > 2 and $atom->symbol eq 'C' |
|
652
|
|
|
|
|
|
|
) { |
|
653
|
0
|
|
|
|
|
|
my $next_valence = next_valence($atom->symbol, $valence); |
|
654
|
0
|
0
|
|
|
|
|
if ($next_valence) { |
|
655
|
0
|
0
|
|
|
|
|
print "increasing valence of $atom to $next_valence\n" |
|
656
|
|
|
|
|
|
|
if $DEBUG; |
|
657
|
0
|
|
|
|
|
|
$atom->attr('bond-find/valence', $next_valence); |
|
658
|
0
|
|
|
|
|
|
++$changes, next; |
|
659
|
|
|
|
|
|
|
} |
|
660
|
0
|
|
|
|
|
|
my ($min_conf, $min_bond); |
|
661
|
0
|
|
|
|
|
|
for my $bond ($atom->bonds) { |
|
662
|
0
|
0
|
0
|
|
|
|
if ($bond->order > 1 and |
|
|
|
|
0
|
|
|
|
|
|
663
|
|
|
|
|
|
|
(not defined $min_conf |
|
664
|
|
|
|
|
|
|
or $bond->attr("bond-find/confidence") < $min_conf)) { |
|
665
|
0
|
|
|
|
|
|
$min_conf = $bond->attr("bond-find/confidence"); |
|
666
|
0
|
|
|
|
|
|
$min_bond = $bond; |
|
667
|
|
|
|
|
|
|
} |
|
668
|
|
|
|
|
|
|
} |
|
669
|
0
|
|
|
|
|
|
my $new_order = $min_bond->order - 1; |
|
670
|
0
|
|
|
|
|
|
$min_bond->order($new_order); |
|
671
|
0
|
0
|
|
|
|
|
print "Decreasing order of $min_bond to $new_order\n" if $DEBUG; |
|
672
|
|
|
|
|
|
|
#$min_bond->attr('bond-find/confidence', $min_conf*1.2+rand()); |
|
673
|
0
|
|
|
|
|
|
++$changes, next; |
|
674
|
|
|
|
|
|
|
} elsif ($n_bonds + $max_conns - $n_conns < $valence) { |
|
675
|
0
|
|
|
|
|
|
my ($min_conf, $min_bond); |
|
676
|
0
|
|
|
|
|
|
for my $bond ($atom->bonds) { |
|
677
|
0
|
0
|
0
|
|
|
|
if (($bond->order == 1 |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
678
|
|
|
|
|
|
|
or $atom->symbol =~ /^[CN]$/ and $bond->order == 2) and |
|
679
|
|
|
|
|
|
|
(not defined $min_conf |
|
680
|
|
|
|
|
|
|
or $bond->attr("bond-find/confidence") < $min_conf)) { |
|
681
|
0
|
|
|
|
|
|
$min_conf = $bond->attr("bond-find/confidence"); |
|
682
|
0
|
|
|
|
|
|
$min_bond = $bond; |
|
683
|
|
|
|
|
|
|
} |
|
684
|
|
|
|
|
|
|
} |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
#print "\t($confidence, $min_conf)\n" if ($atom->id eq 'a1'); |
|
687
|
0
|
0
|
0
|
|
|
|
if ($min_bond and ($confidence > 95 or $min_conf < $confidence)) { |
|
|
|
|
0
|
|
|
|
|
|
688
|
|
|
|
|
|
|
#print "XXX: $atom\n"; |
|
689
|
0
|
|
|
|
|
|
my $new_order = $min_bond->order + 1; |
|
690
|
0
|
|
|
|
|
|
$min_bond->order($new_order); |
|
691
|
0
|
0
|
|
|
|
|
print "Increasing order of $min_bond to $new_order\n" if $DEBUG; |
|
692
|
|
|
|
|
|
|
#$min_bond->attr('bond-find/confidence', $min_conf*1.2+rand()); |
|
693
|
|
|
|
|
|
|
#$atom->attr('bond-find/confidence', $confidence / 1.5); |
|
694
|
|
|
|
|
|
|
} else { |
|
695
|
0
|
|
|
|
|
|
$max_conns++; |
|
696
|
0
|
|
|
|
|
|
$atom->attr("bond-find/max_conns", $max_conns); |
|
697
|
0
|
0
|
|
|
|
|
print "Increasing coord. num. of $atom to $max_conns\n" |
|
698
|
|
|
|
|
|
|
if $DEBUG; |
|
699
|
|
|
|
|
|
|
} |
|
700
|
0
|
|
|
|
|
|
++$changes, next; |
|
701
|
|
|
|
|
|
|
} |
|
702
|
|
|
|
|
|
|
} |
|
703
|
0
|
|
|
|
|
|
$changes; |
|
704
|
|
|
|
|
|
|
} |
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
sub next_valence { |
|
707
|
0
|
|
|
0
|
0
|
|
my ($symbol, $current) = @_; |
|
708
|
|
|
|
|
|
|
|
|
709
|
0
|
0
|
|
|
|
|
if ($Valences{$symbol}) { |
|
710
|
0
|
|
|
|
|
|
for my $v (@{$Valences{$symbol}}) { |
|
|
0
|
|
|
|
|
|
|
|
711
|
0
|
0
|
|
|
|
|
return $v if $v > $current; |
|
712
|
|
|
|
|
|
|
} |
|
713
|
|
|
|
|
|
|
} |
|
714
|
0
|
|
|
|
|
|
return undef; |
|
715
|
|
|
|
|
|
|
} |
|
716
|
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
1; |
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
=back |
|
720
|
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=head1 VERSION |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
0.23 |
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=head1 TO DO |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
Some future version should let the user specify the desired cutoffs, and |
|
728
|
|
|
|
|
|
|
not always create a bond but call a user-supplied function instead. This way |
|
729
|
|
|
|
|
|
|
these functions could be used for other purposes such as finding hydrogen bonds |
|
730
|
|
|
|
|
|
|
or neighbor lists. |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
Add some tests. |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
L, L, L, |
|
737
|
|
|
|
|
|
|
L. |
|
738
|
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=head1 AUTHOR |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
Ivan Tubert-BrohmanEitub@cpan.orgE |
|
742
|
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
The new C algorithm was loosely based on a suggestion by BrowserUK |
|
744
|
|
|
|
|
|
|
on perlmonks.org (L). |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is |
|
749
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
|
750
|
|
|
|
|
|
|
Perl itself. |
|
751
|
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
=cut |
|
753
|
|
|
|
|
|
|
|