File Coverage

blib/lib/Chemistry/File/Mopac.pm
Criterion Covered Total %
statement 16 18 88.8
branch n/a
condition n/a
subroutine 6 6 100.0
pod n/a
total 22 24 91.6


line stmt bran cond sub pod time code
1             package Chemistry::File::Mopac;
2              
3             $VERSION = '0.15';
4             # $Id: Mopac.pm,v 1.5 2004/07/02 18:18:23 itubert Exp $
5              
6 1     1   43519 use 5.006;
  1         37  
  1         144  
7 1     1   6 use strict;
  1         2  
  1         57  
8 1     1   5 use warnings;
  1         8  
  1         39  
9 1     1   7 use base "Chemistry::File";
  1         1  
  1         1576  
10 1     1   44031 use Chemistry::Mol 0.25;
  1         52476  
  1         70  
11 1     1   577 use Chemistry::InternalCoords;
  0            
  0            
12             use List::Util 'first';
13             use Carp;
14              
15             =head1 NAME
16              
17             Chemistry::File::Mopac - MOPAC 6 input file reader/writer
18              
19             =head1 SYNOPSIS
20              
21             use Chemistry::File::Mopac;
22              
23             # read a MOPAC file
24             my $mol = Chemistry::Mol->read('file.mop');
25              
26             # write a MOPAC file using cartesian coordinates
27             $mol->write('file.mop', coords => 'cartesian');
28              
29             # now with internal coordinates
30             $mol->write('file.mop', coords => 'internal');
31              
32             # rebuild the Z-matrix from scratch while we are at it
33             $mol->write('file.mop', rebuild => 1);
34              
35             =cut
36              
37             =head1 DESCRIPTION
38              
39             This module reads and writes MOPAC 6 input files. It can handle both internal
40             coordinates and cartesian coordinates. It also extracts molecules from summary
41             files, defined as those files that match /SUMMARY OF/ in the third line.
42             Perhaps a future version will extract additional information such as the energy
43             and dipole from the summary file.
44              
45             This module registers the C format with Chemistry::Mol. For detection
46             purposes, it assumes that filenames ending in .mop or .zt have the Mopac
47             format, as well as files whose first line matches /am1|pm3|mndo|mdg|pdg/i
48             (this may change in the future).
49              
50             When the module reads an input file into $mol, it puts the keywords (usually
51             the first line of the file) in $mol->attr("mopac/keywords"), the comments
52             (usually everything else on the first three lines) in
53             $mol->attr("mopac/comments") and $mol->name, and the internal coordinates for
54             each atom in $atom->internal_coords.
55              
56             When writing, the kind of coordinates used depend on the C option, as
57             shown in the SYNOPSIS. Internal coordinates are used by default. If the
58             molecule has no internal coordinates defined or the rebuild option is set,
59             the build_zmat function from Chemistry::InternalCoords::Builder is used to
60             renumber the atoms and build the Z-matrix from scratch.
61              
62             =cut
63              
64             Chemistry::Mol->register_format("mop");
65              
66             sub parse_string {
67             my ($class, $string, %opts) = @_;
68              
69             my $mol_class = $opts{mol_class} || "Chemistry::Mol";
70             my $atom_class = $opts{atom_class} || $mol_class->atom_class;
71             my $bond_class = $opts{bond_class} || $mol_class->bond_class;
72              
73             my $mol = $mol_class->new();
74              
75             my @lines = split "\n", $string;
76             local $_;
77              
78             # do we have a summary file?
79             if ($lines[2] =~ /SUMMARY OF/) {
80             # skip everything until FINAL GEOMETRY OBTAINED
81             while (defined ($_ = shift @lines)) {
82             last if /FINAL GEOMETRY OBTAINED/;
83             }
84             }
85              
86             my @header = splice @lines, 0, 3;
87             my @keys;
88              
89             # crazy mopac header extension rules
90             push @keys, $header[0];
91             if ($keys[0] =~ /&/) {
92             push @keys, $header[1];
93             if ($keys[1] =~ /&/) {
94             push @keys, $header[2];
95             }
96             } elsif ($keys[0] =~ /(\+\s|\s\+)/) {
97             push @keys, $header[1];
98             push @header, shift @lines;
99             if ($keys[1] =~ /(\+\s|\s\+)/) {
100             push @keys, $header[2];
101             push @header, shift @lines;
102             }
103             }
104              
105             $mol->attr("mop/keywords" => join "\n", @keys);
106             my $comment = join "\n", @header[@keys..$#header];
107             $comment =~ s/\s+$//;
108             $mol->attr("mopac/comments" => $comment);
109             $comment =~ s/\n/ /g;
110             $mol->name($comment);
111            
112             # read coords
113             my @coords;
114             for (@lines) {
115             # Sample line below
116             # O 1.232010 1 128.812332 1 274.372818 1 3 2 1
117             last if /^\s*$/; #blank line
118             push @coords, [split];
119             }
120            
121             # note: according to the MOPAC6 manual, triatomics must always use
122             # internal coordinates; molecules that don't specify connectivity
123             # (columns 7-9) use cartesian coordinates. I use column 8 for testing
124             # because column 7 is sometimes used for the partial charge, adding to
125             # the confusion. I assume that diatomics are always internal as well.
126             if (@coords <= 3 or first {defined $_->[8]} @coords) {
127             read_internal_coords($mol, @coords);
128             } else { # Cartesian coords
129             read_cartesian_coords($mol, @coords);
130             }
131              
132             return $mol;
133             }
134              
135             sub read_internal_coords {
136             my ($mol, @coords) = @_;
137              
138             my $i = 0; # atom index
139              
140             for my $coord (@coords) {
141             $i++;
142             my ($symbol, $len_val, $len_opt, $ang_val, $ang_opt,
143             $dih_val, $dih_opt, $len_ref, $ang_ref, $dih_ref) = @$coord;
144              
145             # implicit links for the second and third atoms
146             $len_ref ||= 1 if $i == 2;
147             if ($i == 3) {
148             $len_ref ||= 2;
149             $ang_ref = $len_ref == 1 ? 2 : 1;
150             }
151              
152             my $atom = $mol->new_atom(symbol => $symbol);
153             my $ic = Chemistry::InternalCoords->new($atom,
154             $len_ref, $len_val,
155             $ang_ref, $ang_val,
156             $dih_ref, $dih_val);
157             $atom->internal_coords($ic);
158             $ic->add_cartesians;
159             }
160             }
161              
162             sub read_cartesian_coords {
163             my ($mol, @coords) = @_;
164             for my $coord (@coords) {
165             my ($symbol, $x_val, $x_opt, $y_val, $y_opt,
166             $z_val, $z_opt) = @$coord;
167             my $atom = $mol->new_atom(symbol => $symbol,
168             coords => [$x_val, $y_val, $z_val]);
169             }
170             }
171              
172              
173             sub file_is {
174             my ($class, $fname) = @_;
175            
176             return 1 if $fname =~ /\.(?:mop|zt)$/i;
177              
178             open F, $fname or croak "Could not open file $fname";
179            
180             my $line = ;
181             close F;
182             return 1 if $line =~ /am1|pm3|mndo|mdg|pdg/i;
183             return 0;
184             }
185              
186             sub name_is {
187             my ($class, $fname) = @_;
188             $fname =~ /\.(?:mop|zt)$/i;
189             }
190              
191             sub write_string {
192             my ($class, $mol, %opts) = @_;
193             %opts = (coords => "internal", rebuild => 0, %opts);
194             my $ret = $class->format_header($mol);
195             if ($opts{coords} eq 'cartesian') {
196             $ret .= $class->format_line_cart($_) for $mol->atoms;
197             } else {
198             if ($opts{rebuild} or ! $mol->atoms(1)->internal_coords ) {
199             require Chemistry::InternalCoords::Builder;
200             Chemistry::InternalCoords::Builder::build_zmat($mol);
201             }
202             my %index;
203             @index{$mol->atoms} = (1 .. $mol->atoms);
204             $ret .= $class->format_line_ic($_, \%index) for $mol->atoms;
205             }
206             $ret;
207             }
208              
209              
210             sub format_header {
211             my ($class, $mol) = @_;
212             my $ret = ($mol->attr("mop/keywords") || '') . "\n";
213             my $name = $mol->name || '';
214             $name =~ s/\n/ /g;
215             $name = substr $name, 0, 80;
216             $ret .= "\n" . $name . "\n";
217             $ret;
218             }
219              
220             sub format_line_ic {
221             my ($class, $atom, $index) = @_;
222             my $ic = $atom->internal_coords;
223             my ($len_ref, $len_val) = $ic->distance;
224             my ($ang_ref, $ang_val) = $ic->angle;
225             my ($dih_ref, $dih_val) = $ic->dihedral;
226             my $len_idx = $index->{$len_ref||0} || 0;
227             my $ang_idx = $index->{$ang_ref||0} || 0;
228             my $dih_idx = $index->{$dih_ref||0} || 0;
229             no warnings 'uninitialized';
230             sprintf "%-2s %8.3f %1d %8.3f %1d %8.3f %1d %3d %3d %3d\n",
231             $atom->symbol,
232             $len_val, $len_idx ? 1 : 0,
233             $ang_val, $ang_idx ? 1 : 0,
234             $dih_val, $dih_idx ? 1 : 0,
235             $len_idx, $ang_idx, $dih_idx ;
236             }
237              
238             sub format_line_cart {
239             my ($class, $atom) = @_;
240             my ($x, $y, $z) = $atom->coords->array;
241             sprintf "%-2s %8.3f 1 %8.3f 1 %8.3f 1\n",
242             $atom->symbol,
243             $atom->coords->array;
244             }
245              
246             1;
247              
248             =head1 TO DO
249              
250             When writing a Mopac file, this version marks all coordinates as variable
251             (for the purpose of geometry optimization by Mopac). A future version should
252             have more flexibility.
253              
254             =head1 VERSION
255              
256             0.15
257              
258             =head1 SEE ALSO
259              
260             L, L, L,
261             L, L.
262              
263             =head1 AUTHOR
264              
265             Ivan Tubert-Brohman
266              
267             =head1 COPYRIGHT
268              
269             Copyright (c) 2004 Ivan Tubert. All rights reserved. This program is free
270             software; you can redistribute it and/or modify it under the same terms as
271             Perl itself.
272              
273             =cut
274