File Coverage

blib/lib/Chemistry/InternalCoords/Builder.pm
Criterion Covered Total %
statement 21 67 31.3
branch 0 16 0.0
condition n/a
subroutine 7 12 58.3
pod n/a
total 28 95 29.4


line stmt bran cond sub pod time code
1             package Chemistry::InternalCoords::Builder;
2              
3             $VERSION = '0.18';
4              
5 1     1   272336 use strict;
  1         3  
  1         805  
6 1     1   11 use warnings;
  1         2  
  1         328  
7             #use diagnostics;
8              
9 1     1   7440 use Chemistry::Bond::Find 'find_bonds';
  1         19029  
  1         127  
10 1     1   1223 use Chemistry::Canonicalize 'canonicalize';
  1         153471  
  1         138  
11 1     1   1807 use Chemistry::InternalCoords;
  1         3  
  1         47  
12 1     1   7 use List::Util qw(first reduce);
  1         2  
  1         157  
13 1     1   7 use base 'Exporter';
  1         2  
  1         2873  
14              
15             our @EXPORT_OK = qw(build_zmat);
16             our %EXPORT_TAGS = ( all => \@EXPORT_OK );
17              
18              
19             =head1 NAME
20              
21             Chemistry::InternalCoords::Builder - Build a Z-matrix from cartesian
22             coordinates
23              
24             =head1 SYNOPSIS
25              
26             use Chemistry::InternalCoords::Builder 'build_zmat';
27              
28             # $mol is a Chemistry::Mol object
29             build_zmat($mol);
30              
31             # don't change the atom order!
32             build_zmat($mol, bfs => 0);
33              
34             =head1 DESCRIPTION
35              
36             This module builds a Z-matrix from the cartesian coordinates of a molecule,
37             making sure that atoms are defined in a way that allows for efficient structure
38             optimizations and Monte Carlo sampling.
39              
40             By default, the algorithm tries to start at the center of the molecule and
41             builds outward in a breadth-first fashion. Improper dihedrals are used to
42             ensure clean rotation of groups without distortion. All distance and angle
43             references use real bonds and bond angles where possible (the exception being
44             disconnected structures).
45              
46             This module is part of the PerlMol project, L.
47              
48             =head1 FUNCTIONS
49              
50             These functions may be exported, although nothing is exported by default.
51             To export all functions, use the ":all" tag.
52              
53             =over
54              
55             =item build_zmat($mol, %options)
56              
57             Build a Z-matrix from the cartesian coordinates of the molecule. Side effect
58             warning: by default, this function modifies the molecule heavily! First, it
59             finds the bonds if there are no bonds defined already (for example, if the
60             structure came from and XYZ file with no bond information). Second, it
61             canonicalizes the molecule, as a means of finding the "topological center".
62             Third, it builds the Z-matrix using a breadth-first search. Fourth, it sorts
63             the atoms in the molecule in the order that they were defined in the Z-matrix.
64              
65             Options:
66              
67             =over
68              
69             =item bfs
70              
71             Default: true. Follow the procedure described above. If bfs is false, then
72             the atom order is not modified (that is, the atoms are added sequentially in
73             the order in which they appear in the connection table, instead of using the
74             breadth-first search).
75              
76             =item sort
77              
78             Default: true. Do the canonicalization step as described above. This option
79             only applies when bfs => 1, otherwise it has no effect. If false and bfs => 1,
80             the breadth-first search is done, but starting at the first atom in the
81             connection table.
82              
83             =back
84              
85             =cut
86              
87             sub build_zmat {
88 0     0     my ($mol, %opts) = @_;
89 0           %opts = (sort => 1, bfs => 1, %opts);
90              
91 0 0         find_bonds($mol) unless $mol->bonds;
92              
93 0 0         if ($opts{bfs}) {
94 0           my @atoms = $mol->atoms;
95 0 0         if ($opts{sort}) {
96 0           canonicalize($mol);
97 0           @atoms = sort {
98 0           $b->attr("canon/class") <=> $a->attr("canon/class")
99             } @atoms;
100             }
101 0           my $ats = [];
102 0           for my $atom (@atoms) {
103 0 0         next if $atom->attr("zmat/index");
104 0           zmat_bfs($mol, $atom, $ats);
105             }
106             $mol->sort_atoms( sub {
107 0     0     $_[0]->attr("zmat/index") <=> $_[1]->attr("zmat/index")
108 0           });
109             } else {
110 0           zmat_seq($mol);
111             }
112             }
113              
114             sub zmat_seq {
115 0     0     my ($mol) = @_;
116 0           my $i = 0;
117 0           my @atoms;
118 0           for my $atom ($mol->atoms) {
119 0           $atom->attr("zmat/index", ++$i);
120 0           add_ic($atom, \@atoms);
121 0           push @atoms, $atom;
122             }
123             }
124              
125             sub zmat_bfs {
126 0     0     my ($mol, $origin, $atoms) = @_;
127 0           my @q = $origin;
128 0           my $i = @$atoms;
129 0           push @$atoms, $origin;
130 0           $origin->attr("zmat/index", ++$i);
131 0           add_ic($origin, $atoms);
132 0           while (my $atom = shift @q) {
133             #print "at $atom with $i\n";
134 0           for my $nei (sorted_neighbors($atom)) {
135 0 0         unless ($nei->attr("zmat/index")) {
136 0           $nei->attr("zmat/index", ++$i);
137 0           add_ic($nei, $atoms);
138 0           push @q, $nei;
139 0           push @$atoms, $nei;
140             }
141             }
142             }
143 0           $atoms;
144             }
145              
146             # $atoms is the list of atoms that have been added so far
147             sub add_ic {
148 0     0     my ($atom, $atoms) = @_;
149 0           my $ic;
150 0           my $n = $atom->attr("zmat/index");
151 0 0         if ($n == 1) {
    0          
    0          
152 0           $ic = Chemistry::InternalCoords->new($atom);
153             } elsif ($n == 2) {
154 0           $ic = Chemistry::InternalCoords->new($atom, find_length($atom, $atoms));
155             } elsif ($n == 3) {
156 0           $ic = Chemistry::InternalCoords->new(
157             $atom, find_angle($atom, $atoms));
158             } else {
159 0           $ic = Chemistry::InternalCoords->new(
160             $atom, find_dihedral($atom, $atoms));
161             }
162 0           $atom->internal_coords($ic);
163             }
164              
165             # Choose a good length reference for $atom
166             sub find_length {
167             my ($atom, $atoms) = @_;
168             my $ref = first { $_->attr("zmat/index") } $atom->neighbors;
169             unless ($ref) {
170             $ref = ${
171             reduce { $a->[0] < $b->[0] ? $a : $b }
172             map { [$atom->distance($_)] }
173             grep { $_ ne $atom } @$atoms;
174             }[1];
175             }
176             ($ref, scalar $atom->distance($ref));
177             }
178              
179             # Choose a good angle (and length) reference for $atom
180             sub find_angle {
181             my ($atom, $atoms) = @_;
182             my ($len_ref, $len_val) = find_length(@_);
183             my $ang_ref =
184             first {
185             $_->attr("zmat/index") && $_ ne $len_ref && $_ ne $atom
186             } $len_ref->neighbors($atom), @$atoms;
187             my $ang_val = $atom->angle_deg($len_ref, $ang_ref);
188             ($len_ref, $len_val, $ang_ref, $ang_val);
189             }
190              
191             # Choose a good dihedral (and angle and length) reference for $atom
192             sub find_dihedral {
193             my ($atom, $atoms) = @_;
194             my ($len_ref, $len_val, $ang_ref, $ang_val) = find_angle(@_);
195             my $dih_ref =
196             first {
197             $_->attr("zmat/index") && $_ ne $len_ref
198             && $_ ne $ang_ref && $_ ne $atom
199             } $len_ref->neighbors($atom), $ang_ref->neighbors($len_ref), @$atoms;
200             my $dih_val = $atom->dihedral_deg($len_ref, $ang_ref, $dih_ref);
201             ($len_ref, $len_val, $ang_ref, $ang_val, $dih_ref, $dih_val);
202             }
203              
204             sub sorted_neighbors {
205             my ($atom) = @_;
206             my @bn = $atom->neighbors;
207             @bn = sort {
208             $b->attr("canon/class") <=> $a->attr("canon/class")
209             } @bn;
210             @bn;
211             }
212              
213             1;
214              
215             =back
216              
217             =head1 VERSION
218              
219             0.18
220              
221             =head1 CAVEATS
222              
223             This version may not work properly for big molecules, because the
224             canonicalization step has a size limit.
225              
226             =head1 TO DO
227              
228             Some improvements for handling disconnected structures, such as making sure
229             that the intermolecular distance is short.
230              
231             Allowing more control over how much the molecule will be modified: sort or not,
232             canonicalize or not...
233              
234             =head1 SEE ALSO
235              
236             L, L, L,
237             L, L,
238             L.
239              
240             =head1 AUTHOR
241              
242             Ivan Tubert-Brohman Eitub@cpan.orgE
243              
244             =head1 COPYRIGHT
245              
246             Copyright (c) 2004 Ivan Tubert-Brohman. All rights reserved. This program is
247             free software; you can redistribute it and/or modify it under the same terms as
248             Perl itself.
249              
250             =cut
251