File Coverage

blib/lib/Chemistry/InternalCoords/Builder.pm

Criterion	Covered	Total	%
statement	21	67	31.3
branch	0	16	0.0
condition			n/a
subroutine	7	12	58.3
pod			n/a
total	28	95	29.4

line	stmt	bran	sub	time	code
1					package Chemistry::InternalCoords::Builder;
2
3					$VERSION = '0.18';
4
5	1		1	272336	use strict;
	1			3
	1			805
6	1		1	11	use warnings;
	1			2
	1			328
7					#use diagnostics;
8
9	1		1	7440	use Chemistry::Bond::Find 'find_bonds';
	1			19029
	1			127
10	1		1	1223	use Chemistry::Canonicalize 'canonicalize';
	1			153471
	1			138
11	1		1	1807	use Chemistry::InternalCoords;
	1			3
	1			47
12	1		1	7	use List::Util qw(first reduce);
	1			2
	1			157
13	1		1	7	use base 'Exporter';
	1			2
	1			2873
14
15					our @EXPORT_OK = qw(build_zmat);
16					our %EXPORT_TAGS = ( all => \@EXPORT_OK );
17
18
19					=head1 NAME
20
21					Chemistry::InternalCoords::Builder - Build a Z-matrix from cartesian
22					coordinates
23
24					=head1 SYNOPSIS
25
26					use Chemistry::InternalCoords::Builder 'build_zmat';
27
28					# $mol is a Chemistry::Mol object
29					build_zmat($mol);
30
31					# don't change the atom order!
32					build_zmat($mol, bfs => 0);
33
34					=head1 DESCRIPTION
35
36					This module builds a Z-matrix from the cartesian coordinates of a molecule,
37					making sure that atoms are defined in a way that allows for efficient structure
38					optimizations and Monte Carlo sampling.
39
40					By default, the algorithm tries to start at the center of the molecule and
41					builds outward in a breadth-first fashion. Improper dihedrals are used to
42					ensure clean rotation of groups without distortion. All distance and angle
43					references use real bonds and bond angles where possible (the exception being
44					disconnected structures).
45
46					This module is part of the PerlMol project, L.
47
48					=head1 FUNCTIONS
49
50					These functions may be exported, although nothing is exported by default.
51					To export all functions, use the ":all" tag.
52
53					=over
54
55					=item build_zmat($mol, %options)
56
57					Build a Z-matrix from the cartesian coordinates of the molecule. Side effect
58					warning: by default, this function modifies the molecule heavily! First, it
59					finds the bonds if there are no bonds defined already (for example, if the
60					structure came from and XYZ file with no bond information). Second, it
61					canonicalizes the molecule, as a means of finding the "topological center".
62					Third, it builds the Z-matrix using a breadth-first search. Fourth, it sorts
63					the atoms in the molecule in the order that they were defined in the Z-matrix.
64
65					Options:
66
67					=over
68
69					=item bfs
70
71					Default: true. Follow the procedure described above. If bfs is false, then
72					the atom order is not modified (that is, the atoms are added sequentially in
73					the order in which they appear in the connection table, instead of using the
74					breadth-first search).
75
76					=item sort
77
78					Default: true. Do the canonicalization step as described above. This option
79					only applies when bfs => 1, otherwise it has no effect. If false and bfs => 1,
80					the breadth-first search is done, but starting at the first atom in the
81					connection table.
82
83					=back
84
85					=cut
86
87					sub build_zmat {
88	0		0		my ($mol, %opts) = @_;
89	0				%opts = (sort => 1, bfs => 1, %opts);
90
91	0	0			find_bonds($mol) unless $mol->bonds;
92
93	0	0			if ($opts{bfs}) {
94	0				my @atoms = $mol->atoms;
95	0	0			if ($opts{sort}) {
96	0				canonicalize($mol);
97	0				@atoms = sort {
98	0				$b->attr("canon/class") <=> $a->attr("canon/class")
99					} @atoms;
100					}
101	0				my $ats = [];
102	0				for my $atom (@atoms) {
103	0	0			next if $atom->attr("zmat/index");
104	0				zmat_bfs($mol, $atom, $ats);
105					}
106					$mol->sort_atoms( sub {
107	0		0		$_[0]->attr("zmat/index") <=> $_[1]->attr("zmat/index")
108	0				});
109					} else {
110	0				zmat_seq($mol);
111					}
112					}
113
114					sub zmat_seq {
115	0		0		my ($mol) = @_;
116	0				my $i = 0;
117	0				my @atoms;
118	0				for my $atom ($mol->atoms) {
119	0				$atom->attr("zmat/index", ++$i);
120	0				add_ic($atom, \@atoms);
121	0				push @atoms, $atom;
122					}
123					}
124
125					sub zmat_bfs {
126	0		0		my ($mol, $origin, $atoms) = @_;
127	0				my @q = $origin;
128	0				my $i = @$atoms;
129	0				push @$atoms, $origin;
130	0				$origin->attr("zmat/index", ++$i);
131	0				add_ic($origin, $atoms);
132	0				while (my $atom = shift @q) {
133					#print "at $atom with $i\n";
134	0				for my $nei (sorted_neighbors($atom)) {
135	0	0			unless ($nei->attr("zmat/index")) {
136	0				$nei->attr("zmat/index", ++$i);
137	0				add_ic($nei, $atoms);
138	0				push @q, $nei;
139	0				push @$atoms, $nei;
140					}
141					}
142					}
143	0				$atoms;
144					}
145
146					# $atoms is the list of atoms that have been added so far
147					sub add_ic {
148	0		0		my ($atom, $atoms) = @_;
149	0				my $ic;
150	0				my $n = $atom->attr("zmat/index");
151	0	0			if ($n == 1) {
		0
		0
152	0				$ic = Chemistry::InternalCoords->new($atom);
153					} elsif ($n == 2) {
154	0				$ic = Chemistry::InternalCoords->new($atom, find_length($atom, $atoms));
155					} elsif ($n == 3) {
156	0				$ic = Chemistry::InternalCoords->new(
157					$atom, find_angle($atom, $atoms));
158					} else {
159	0				$ic = Chemistry::InternalCoords->new(
160					$atom, find_dihedral($atom, $atoms));
161					}
162	0				$atom->internal_coords($ic);
163					}
164
165					# Choose a good length reference for $atom
166					sub find_length {
167					my ($atom, $atoms) = @_;
168					my $ref = first { $_->attr("zmat/index") } $atom->neighbors;
169					unless ($ref) {
170					$ref = ${
171					reduce { $a->[0] < $b->[0] ? $a : $b }
172					map { [$atom->distance($_)] }
173					grep { $_ ne $atom } @$atoms;
174					}[1];
175					}
176					($ref, scalar $atom->distance($ref));
177					}
178
179					# Choose a good angle (and length) reference for $atom
180					sub find_angle {
181					my ($atom, $atoms) = @_;
182					my ($len_ref, $len_val) = find_length(@_);
183					my $ang_ref =
184					first {
185					$_->attr("zmat/index") && $_ ne $len_ref && $_ ne $atom
186					} $len_ref->neighbors($atom), @$atoms;
187					my $ang_val = $atom->angle_deg($len_ref, $ang_ref);
188					($len_ref, $len_val, $ang_ref, $ang_val);
189					}
190
191					# Choose a good dihedral (and angle and length) reference for $atom
192					sub find_dihedral {
193					my ($atom, $atoms) = @_;
194					my ($len_ref, $len_val, $ang_ref, $ang_val) = find_angle(@_);
195					my $dih_ref =
196					first {
197					$_->attr("zmat/index") && $_ ne $len_ref
198					&& $_ ne $ang_ref && $_ ne $atom
199					} $len_ref->neighbors($atom), $ang_ref->neighbors($len_ref), @$atoms;
200					my $dih_val = $atom->dihedral_deg($len_ref, $ang_ref, $dih_ref);
201					($len_ref, $len_val, $ang_ref, $ang_val, $dih_ref, $dih_val);
202					}
203
204					sub sorted_neighbors {
205					my ($atom) = @_;
206					my @bn = $atom->neighbors;
207					@bn = sort {
208					$b->attr("canon/class") <=> $a->attr("canon/class")
209					} @bn;
210					@bn;
211					}
212
213					1;
214
215					=back
216
217					=head1 VERSION
218
219					0.18
220
221					=head1 CAVEATS
222
223					This version may not work properly for big molecules, because the
224					canonicalization step has a size limit.
225
226					=head1 TO DO
227
228					Some improvements for handling disconnected structures, such as making sure
229					that the intermolecular distance is short.
230
231					Allowing more control over how much the molecule will be modified: sort or not,
232					canonicalize or not...
233
234					=head1 SEE ALSO
235
236					L, L, L,
237					L, L,
238					L.
239
240					=head1 AUTHOR
241
242					Ivan Tubert-Brohman Eitub@cpan.orgE
243
244					=head1 COPYRIGHT
245
246					Copyright (c) 2004 Ivan Tubert-Brohman. All rights reserved. This program is
247					free software; you can redistribute it and/or modify it under the same terms as
248					Perl itself.
249
250					=cut
251