File Coverage

blib/lib/Chemistry/ESPT/Glog.pm
Criterion Covered Total %
statement 48 245 19.5
branch 0 108 0.0
condition 1 30 3.3
subroutine 5 7 71.4
pod 2 2 100.0
total 56 392 14.2


line stmt bran cond sub pod time code
1             package Chemistry::ESPT::Glog;
2              
3 1     1   23648 use base qw(Chemistry::ESPT::ESSfile);
  1         3  
  1         623  
4 1     1   1318 use Chemistry::ESPT::Glib 0.01;
  1         27  
  1         78  
5 1     1   6 use strict;
  1         2  
  1         29  
6 1     1   6 use warnings;
  1         1  
  1         3141  
7              
8             our $VERSION = '0.08';
9              
10             =head1 NAME
11              
12             Chemistry::ESPT::Glog - Gaussian log file object.
13              
14             =head1 SYNOPSIS
15              
16             use Chemistry::ESPT::Glog;
17              
18             my $log = Chemistry::ESPT::Glog->new();
19              
20             =head1 DESCRIPTION
21              
22             This module provides methods to quickly access data contained in a Gaussian log file.
23             Guassian log files can only be read currently.
24              
25             =begin comment
26              
27             ### Version History ###
28             0.01 digest closed-shell DFT log files
29             0.02 adjust total energy for Opt/Freq runs
30             0.03 added TPSS functional, fixed issue with large negative eigenvalues,
31             store all PG, E(elec), Eigenvalues, MO symm, and Occupations values,
32             get method for 1 & 2D arrays
33             0.04 fixed Version regexp to handle GDV
34             0.05 set MO symmetry labels if PG = C(1)
35             0.06 Extract & Electronic State, consolidated get & get2D methods,
36             Debug options, fixed MOsymm regexp for D(*H) species, fixed G98
37             Total MO bug
38             0.07 Switched to ESPT namespace, total cpu times, fix for * notation
39             in basis sets, route parsing for theory/basis syntax, enumerate
40             the * & ** basis notations, "keyword(option, option)" keywords
41             properly split in the route parser, handle + in revision numbers,
42             use Glib, thermal corrections to E, H, and G; Delta G(solv), complete flag,
43             extensive perldoc, optimized flag, saddle-point order,
44             0.08 Updated S**2 regex,
45              
46             ### To Do ###
47             -enable/disable storage of all reoccuring data values
48             -store only most recent data from reoccuring data by default
49             -TS vector decomposition
50             -Improved handling of % commands: place into a hash by splitting over =
51             this will keep memory useage down and simplify the regexs
52             -More info for Gen basis sets
53             -Handle non #p log files
54             -Internal coordinates
55             -Multi-job Link1 log files
56             -Handle RO(Theory)
57             -Nonstd routes
58             -Handle correlation method energies properly
59             -Digest scan data
60              
61             =end comment
62              
63             =head1 ATTRIBUTES
64              
65             All attributes are currently read-only and get populated by reading the assigned ESS
66             file. Attribute values
67             are accessible through the B<$Glog-Eget()> method.
68              
69             =over 15
70              
71             =item BASISLABELS
72              
73             Rank two tensor containing the labels for each basis function.
74              
75             =item C
76              
77             A rank three tensor containing the NBASIS x NBASIS coefficient matrices. The coefficients
78             correspond to Alpha or Beta depending upon what spin was passesd to
79             B<$Glog-Eanalyze()>.
80              
81             =item COMPILE
82              
83             Architecture for which the employed version of Gaussian was compiled.
84              
85             =item COMPILEDATE
86              
87             Date when the employed version of Gaussian was compiled.
88              
89             =item EELEC
90              
91             Electronic energy for the theory level employed.
92              
93             =item ESCF
94              
95             Array of SCF energies. This will be either the Hartree-Fock or the DFT energy.
96              
97             =item EIGEN
98              
99             A rank two tensor containing the eigenvalues. The eigenvalues correspond to Alpha or
100             Beta depending upon what spin was passesd to B<$Glog-Eanalyze()>.
101              
102             =item ETHERM
103              
104             Thermal corrections to energy.
105              
106             =item EZPE
107              
108             Current zero-point energy.
109              
110             =item FUNCTIONAL
111              
112             String containing the DFT functional utlized in this job.
113              
114             =item GSOLV
115              
116             Current Delta G of solvation.
117              
118             =item GTHERM
119              
120             Thermal corrections to G.
121              
122             =item HOMO
123              
124             Number corresponding to the highest occupied molecular orbital. The value corresponds
125             to either Alpha or Beta electrons depending upon what spin was passesd to
126             B<$Glog-Eanalyze()>.
127              
128             =item HTHERM
129              
130             Thermal corrections to H.
131              
132             =item KEYWORDS
133              
134             Array containing Gaussian keywords used in this job.
135              
136             =item LINK0
137              
138             Array containing the Gaussian Link0 commands. Only the value passed to the
139             Link0 command is stored. This data will be accessible via a Link0 method in future
140             releases. The contents of the array positions are as follows:
141              
142             =back
143              
144             =over 15
145              
146             =over 5
147              
148             =item 0
149              
150             %nproc or %nprocshared
151              
152             =item 1
153              
154             %mem
155              
156             =item 2
157              
158             %chk
159              
160             =item 3
161              
162             %subst
163              
164             =item 4
165              
166             %nproclinda or %lindaworkers
167              
168             =item 5
169              
170             %save - stored as 1 if present, 0 otherwise
171              
172             =item 6
173              
174             %nosave - stored as 1 if present, 0 otherwise
175              
176             =item 7
177              
178             %kjob
179              
180             =item 8
181              
182             %rwf
183              
184             =item 9
185              
186             %int
187              
188             =item 10
189              
190             %d2e
191              
192             =back
193              
194             =back
195              
196             =over 15
197              
198             =item MOSYMM
199              
200             A rank two tensor containing the symmmetry labels for each molecular orbital.
201              
202             =item NCARTESIAN
203              
204             Current number of Cartesian basis functions.
205              
206             =item NPRIMITIVE
207              
208             Current number of primitive Guassians in the basis set.
209              
210             =item OCC
211              
212             Rank two tensor containing the molecular orbital occupations.
213              
214             =item OPTIMIZED
215              
216             Flag indicating successful optimization (1). Defaults to 0.
217              
218             =item PG
219              
220             Array of molecular point group values.
221              
222             =item REVISION
223              
224             Gaussian revision label.
225              
226             =item ROUTE
227              
228             Gaussian route line
229              
230             =item RUNTIME
231              
232             Date when the calculation was run.
233              
234             =item SADDLEPOINT
235              
236             Current Saddle-point order. Ground states are order 0 and transition states are order 1.
237              
238             =item SSQUARED
239              
240             Array of expectation values.
241              
242             =item VERSION
243              
244             Gaussian version.
245              
246             =back
247              
248             =head1 METHODS
249              
250             Method parameters denoted in [] are optional.
251              
252             =over 15
253              
254             =item B<$log-Enew()>
255              
256             Creates a new Glog object
257              
258             =cut
259              
260             ## the object constructor **
261              
262             sub new {
263 1     1 1 12 my $invocant = shift;
264 1   33     10 my $class = ref($invocant) || $invocant;
265 1         11 my $log = Chemistry::ESPT::ESSfile->new();
266              
267 1         8 $log->{TYPE} = "log";
268              
269             # program info
270 1         3 $log->{PROGRAM} = "Gaussian";
271 1         2 $log->{VERSION} = undef;
272 1         33 $log->{REVISION} = undef;
273 1         3 $log->{COMPILE} = undef;
274 1         9 $log->{COMPILEDATE} = undef;
275            
276             # Link 0 & Route commands
277 1         3 $log->{LINK0} = [];
278 1         3 $log->{KEYWORDS} = [];
279 1         2 $log->{ROUTE} = undef;
280              
281             # calc info
282 1         4 $log->{FUNCTIONAL} = undef;
283 1         2 $log->{NPRIMITIVE} = undef;
284 1         3 $log->{NCARTESIAN} = undef;
285 1         2 $log->{OPTIMIZED} = undef; # flag indicating successful optimization
286 1         5 $log->{RUNTIME} = undef;
287            
288             # molecular info
289 1         3 $log->{BASISLABELS} = [];
290 1         3 $log->{C} = []; # coefficient matrix
291 1         2 $log->{EELEC} = undef; # electronic energy for the current method
292 1         3 $log->{ETHERM} = undef; # Thermal corrections to E
293 1         2 $log->{EIGEN} = [];
294 1         3 $log->{EINFO} = "E(elec)"; # total energy description
295 1         2 $log->{ESCF} = []; # SCF electronic energy
296 1         3 $log->{EZPE} = undef; # ZPE
297 1         2 $log->{GSOLV} = undef; # Delta G of Solvation
298 1         3 $log->{GTHERM} = undef; # Thermal corrections to G
299 1         3 $log->{HOMO} = undef;
300 1         2 $log->{HTHERM} = undef; # Thermal corrections to H
301 1         16 $log->{MOSYMM} = [];
302 1         3 $log->{SADDLEPOINT} = undef; # Saddle-point Order
303 1         3 $log->{OCC} =[]; # MO occupation info
304 1         2 $log->{PG} = [];
305 1         3 $log->{SSQUARED} = []; # S squared values
306              
307 1         3 bless($log, $class);
308 1         4 return $log;
309             }
310              
311              
312             ## methods ##
313              
314             =item B<$log-Eanalyze(filename [spin])>
315              
316             Analyze the spin results in file called filename. Spin defaults to Alpha.
317              
318             =cut
319              
320             # set filename & spin then digest the file
321             sub analyze : method {
322 0     0 1   my $log = shift;
323 0           $log->prepare(@_);
324 0           $log->_digest();
325 0           return;
326             }
327              
328              
329             ## subroutines ##
330             sub _digest {
331             # Files larger than 1Mb should be converted to binary and then
332             # processed to ensure maximum speed. -D. Ennis, OSC
333              
334             # For items with multiple occurances, the last value is reported
335              
336 0     0     my $log = shift;
337              
338             # flags & counters
339 0           my $Cflag = 0;
340 0           my $symmflag = 0;
341 0           my $rparsed = 0;
342 0           my $Sflag = 0;
343 0           my $Scount = 0;
344 0           my $Ccount = 0;
345 0           my $dcount = 0;
346 0           my $eigcount = 0;
347 0           my $Ecount = 0;
348 0           my $ESTATEcount = 0;
349 0           my $orbcount = -1;
350 0           my $MOcount = 0;
351 0           my $PGcount = 0;
352              
353             # open filename for reading or display error
354 0 0         open(LOGFILE,$log->{FILENAME}) || die "Could not read $log->{FILENAME}\n$!\n";
355              
356             # grab everything which may be useful
357 0           while (){
358             # skip blank lines
359 0 0         next if /^$/;
360              
361             # dashed line (signaling route, title, etc)
362 0 0         if ( /^\s-{4,}$/ ) {
363 0           ++$dcount;
364 0           next;
365             }
366             # parse route
367 0 0 0       if ( $rparsed == 0 && $dcount >= 4 ){
368 0           rparser($log);
369 0           $rparsed = 1;
370             }
371             # version info
372 0 0         if ( /^\s+Gaussian\s+([0-9DV]+):\s+([a-zA-Z0-9-]+)-G\1Rev([a-zA-Z]+\.[\d\+]+)\s+(\d{1,2}-[a-zA-Z]+-\d{4})\s*/ ) {
373 0           $log->{VERSION} = $1;
374 0           $log->{REVISON} = $3;
375 0           $log->{COMPILE} = $2;
376 0           $log->{COMPILEDATE} = $4;
377 0           next;
378             }
379             # run date
380 0 0         if ( /^\s+(\d{1,2}-[a-zA-Z]+-\d{4})\s*/ ) {
381 0           $log->{RUNTIME} = $1;
382 0           next;
383             }
384             # % commands
385 0 0         if ( /^\s+\%nproc(?:shared)*=(\d+)/ ) {
386 0           $log->{LINK0} [0] = $_;
387 0           next;
388             }
389 0 0         if ( /^\s+\%mem=(\d+)/ ) {
390 0           $log->{LINK0} [1] = $1;
391 0           next;
392             }
393 0 0         if ( /^\s+\%chk=(.+)/ ) {
394 0           $log->{LINK0} [2] = $1;
395 0           next;
396             }
397 0 0         if ( /^\s+\%subst\s+(L.+)/ ) {
398 0           $log->{LINK0} [3] = $1;
399 0           next;
400             }
401 0 0         if ( /^\s+\%(?:nproc)*linda(?:workers)*=(\d+)/ ) {
402 0           $log->{LINK0} [4] = $1;
403 0           next;
404             }
405 0 0         if ( /^\s+\%save/ ) {
406 0           $log->{LINK0} [5] = 1;
407 0           next;
408             }
409 0 0         if ( /^\s+\%nosave/ ) {
410 0           $log->{LINK0} [6] = 1;
411 0           next;
412             }
413 0 0         if ( /^\s+\%kjob\s+(.+)/ ) {
414 0           $log->{LINK0} [7] = $1;
415 0           next;
416             }
417 0 0         if ( /^\s+\%rwf=(.+)/ ) {
418 0           $log->{LINK0} [8] = $1;
419 0           next;
420             }
421 0 0         if ( /^\s+\%int=(.+)/ ) {
422 0           $log->{LINK0} [9] = $1;
423 0           next;
424             }
425 0 0         if ( /^\s+\%d2e=(.+)/ ) {
426 0           $log->{LINK0} [10] = $1;
427 0           next;
428             }
429             # route card
430 0 0 0       if ( /^\s(#\s*[\+\/a-zA-Z0-9\*=\(\-,\)\s]+)\Z/ ) {
    0          
431 0           $log->{ROUTE} = lc($1);
432 0           $log->{ROUTE}=~ s/\s+$//;
433 0           next;
434             }
435             elsif ( /^\s([\+\/a-zA-Z0-9=\(\-,\)\s]+)\Z/ && $dcount == 3 ) {
436 0           $log->{ROUTE} = $log->{ROUTE}.$1;
437 0           $log->{ROUTE}=~ s/\s+$//;
438 0           next;
439             }
440             # charge & multiplicity (multiple values and occurances)
441 0 0         if ( /^\s+Charge\s+=\s+(-*\d+)\s+Multiplicity\s+=\s+(\d+)\s*/ ) {
442 0           $log->{CHARGE} = $1;
443 0           $log->{MULTIPLICITY} = $2;
444 0           next;
445             }
446             # full point group (multiple occurances)
447 0 0         if ( /^\s+Full\s+point\s+group\s+([CDSIOT])([0-9DHISV\*]+)\s+.*/ ) {
448 0           $log->{PG} [$PGcount] = "$1($2)";
449 0           $PGcount++;
450 0           next;
451             }
452             # basis functions (multiple occurances)
453 0 0         if ( /^\s+(\d+)\s+basis functions,*\s+(\d+)\s+primitive gaussians,*\s*(\d+)*\s*(cartesian basis functions)*\s*/ ) {
454 0           $log->{NBASIS} = $1;
455 0           $log->{NPRIMITIVE} = $2;
456 0           $log->{NCARTESIAN} = $3;
457 0           next;
458             }
459             # electrons (multiple occurances)
460             # figure HOMO & LUMO, alphas fill first
461 0 0         if ( /^\s+(\d+)\s+alpha electrons\s+(\d+)\s+beta electrons/ ) {
462 0           $log->{ALPHA} = $1;
463 0           $log->{BETA} = $2;
464 0           $log->{HOMO} = $log->{uc($log->{SPIN})};
465 0           next;
466             }
467            
468             # orbital symmetries (multiple occurances)
469 0 0 0       if ( /^\s+Orbital (?i:S)ymmetries:\s*/ || /^\s+$log->{SPIN}\s+Orbitals:\s*/ ) {
470 0           $symmflag = 1;
471 0           $orbcount++;
472 0           next;
473             }
474 0 0 0       if ( $symmflag == 1 && /^\s+(\w*)\s+(?:\([123ABDEGHILMPSTU\?'"]{1,4}\)\s*){1,}$/ ) {
475 0           (my $junk, my $tmp) = split /^\s+\w*/;
476 0           $tmp =~ s/\s|\(//g;
477 0           $tmp =~ tr/A-Z/a-z/;
478 0           my @sym = split /\)/, $tmp;
479 0 0         $orbcount++ if $orbcount == -1; # temporary hack
480 0           for (my $i=0; $i
481 0           $log->{MOSYMM} [$orbcount] [$MOcount] = $sym[$i];
482 0           $MOcount++;
483 0 0 0       $MOcount = $symmflag = 0 if ($MOcount == $log->{NBASIS} -1 && $log->{VERSION} eq "98");
484 0 0         $MOcount = $symmflag = 0 if $MOcount == $log->{NBASIS};
485             }
486 0           next;
487             }
488             # SCF electronic energy (multiple occurances)
489 0 0         if ( /^\s+SCF\s+Done:\s+.*\s+=\s+(-*\d+\.\d+)/ ) {
490 0           $log->{ESCF} [$Ecount]= $1;
491 0           $log->{EELEC} = $1;
492 0           $log->{ENERGY} = $1;
493 0           $Ecount++;
494 0           next;
495             }
496             # vlaue (multiple occurances)
497             # generally not printed for closed shell theories
498 0 0         if ( /^\s+.*?<*S\*\*2>*\s*=\s+(\d+\.\d+)/ ) {
499 0           $log->{SSQUARED} [$Scount] = $1;
500 0           $Scount++;
501 0           next;
502             }
503             # Delta G of solvation (occurs each SCF cycle on SCRF jobs)
504 0 0         if ( /^\s+DeltaG\s+\(solv\)\s+\(kcal\/mol\)\s+=\s+(-*\d+\.\d+)/ ) {
505 0           $log->{GSOLV} = $1;
506 0           next;
507             }
508             # Optimization completion
509 0 0         if ( /^\s+Optimization\s+completed\./ ){
510 0           $log->{OPTIMIZED} = 1;
511 0           next;
512             }
513             # Electronic State (multiple occurances)
514 0 0         if ( /^\s+The\s+electronic\s+state\s+is\s+(\d+-[123ABDEGHILMPSTU\?'"]{1,3})/ ) {
515 0           $log->{ESTATE} [$ESTATEcount] = $1;
516 0           $ESTATEcount++;
517 0           next;
518             }
519             # eigenvalues
520 0 0         if ( /^\s+$log->{SPIN}\s+([ocvirt]+\.)\s+eigenvalues\s+--\s+(?:-*\d+\.\d+\s*){1,}$/ ) {
521 0           my $pop = $1;
522 0           (my $junk,my $tmp) = split /^\s+$log->{SPIN}\s+\w+\.\s+eigenvalues\s+--\s+/;
523 0           $tmp =~ s/(\d)-(\d)/$1 -$2/; # separate large negative eigenvalues
524 0           my @eig = split /\s+/, $tmp;
525 0 0         $orbcount++ if $orbcount == -1;
526 0           for (my $i=0; $i
527 0           $log->{EIGEN} [$orbcount] [$eigcount] = $eig[$i];
528 0           $log->{OCC} [$orbcount] [$eigcount] = $pop;
529 0           $eigcount++;
530 0 0 0       $eigcount = 0 if ($eigcount == $log->{NBASIS}-1 && $log->{VERSION} eq "98");
531 0 0         $eigcount = 0 if $eigcount == $log->{NBASIS};
532             }
533 0           next;
534             }
535             # MO coeffients (square matrix, multiple occurances)
536 0 0         if ( /\s+($log->{SPIN})*Molecular Orbital Coefficients/ ) {
537 0           $Cflag = 1;
538 0           $log->{C} = undef;
539 0           next;
540             }
541 0 0 0       if ( $Cflag == 1 && /\s*(\d+)\s(\d+)\s*(\w+)\s+(\d+[A-Z]+\s?\-?\+?[0-9]*)\s*(\-*\d+\.\d+)\s*(\-*\d+\.\d+)\s*(\-*\d+\.\d+)\s*(\-*\d+\.\d+)\s*(\-*\d+\.\d+)\s*/ ) {
    0 0        
542 0           $log->{BASISLABELS}[$Ccount] = [$1, $2, $3, $4];
543 0           push @{ $log->{C}[$Ccount] }, $5, $6, $7, $8, $9;
  0            
544 0           $Ccount++;
545 0 0         $Ccount = 0 if $Ccount == $log->{NBASIS};
546 0           next;
547             } elsif ( $Cflag == 1 && /\s*(\d+)\s*(\d+[A-Z]+\s?\-?\+?[0-9]*)\s*(\-*\d+\.\d+)\s*(\-*\d+\.\d+)\s*(\-*\d+\.\d+)\s*(\-*\d+\.\d+)\s*(\-*\d+\.\d+)\s*/ ) {
548 0           $log->{BASISLABELS}[$Ccount] = [$1, $log->{BASISLABELS}[$Ccount - 1] [1], $log->{BASISLABELS}[$Ccount - 1] [2], $2];
549 0           push @{ $log->{C}[$Ccount] }, $3, $4, $5, $6, $7;
  0            
550 0           $Ccount++;
551 0 0         $Ccount = 0 if $Ccount == $log->{NBASIS};
552 0           next;
553             }
554             # density matrix
555 0 0         if (/\s+DENSITY MATRIX./){
556 0           $Cflag = 0;
557 0           next;
558             }
559             # Saddle-point Order
560             # appears only in freq runs that are not minima
561 0 0         if ( /^\s+\*+\s+(\d+)\s+imaginary\sfrequencies\s\(negative\sSigns\)\s+\*+/ ){
562 0           $log->{SADDLEPOINT} = $1;
563 0           next;
564             }
565             # ZPE (frequency runs only)
566 0 0         if ( /^\s+Zero-point\s+correction=\s+(-*\d*.\d+)/ ){
567 0           $log->{EZPE} = $1;
568 0           $log->{ENERGY} = $log->get("ESCF") + $log->{EZPE};
569 0           $log->{EINFO} = "E(elec) + E(ZPE)";
570 0           next;
571             }
572             # Thermal corrections to E (freq runs only)
573 0 0         if ( /^\s+Thermal\scorrection\sto\sEnergy=\s+(\d\.\d+)/ ) {
574 0           $log->{ETHERM} = $1;
575 0           next;
576             }
577             # Thermal corrections to H (freq runs only)
578 0 0         if ( /^\s+Thermal\scorrection\sto\sEnthalpy=\s+(\d\.\d+)/ ) {
579 0           $log->{HTHERM} = $1;
580 0           next;
581             }
582             # Thermal corrections to G (freq runs only)
583 0 0         if ( /^\s+Thermal\scorrection\sto\sGibbs\sFree\sEnergy=\s+(\d\.\d+)/ ) {
584 0           $log->{GTHERM} = $1;
585 0           next;
586             }
587             # Successful Job completion
588 0 0         if ( /^\s+Normal\s+termination\s+of\s+$log->{PROGRAM}/ ){
589 0           $log->{COMPLETE} = 1;
590 0           next;
591             }
592             # Calculation time stored as days, hours, minutes, seconds.
593             # add code for tracking individual times.
594 0 0         if ( /\s+Job\s+cpu\s+time:\s+(\d+)\s+days\s+(\d+)\s+hours\s+(\d+)\s+minutes\s+(\d+\.\d)\s+seconds/ ) {
595 0           $log->{TIME} [0] = $1 + $log->{TIME} [0];
596 0           $log->{TIME} [1] = $2 + $log->{TIME} [1];
597 0           $log->{TIME} [2] = $3 + $log->{TIME} [2];
598 0           $log->{TIME} [3] = $4 + $log->{TIME} [3];
599 0           next;
600             }
601             }
602              
603             # Saddle-point Order for minima
604 0 0 0       if ( $log->get("JOBTYPE") =~ /FREQ/ && $log->{COMPLETE} == 1 ){
605 0 0         $log->{SADDLEPOINT} = 0 unless defined $log->{SADDLEPOINT};
606             }
607              
608             # C(1) symmetry label hack
609 0 0         if ( $log->{"PG"} eq "C(1)" ) {
610 0 0         if ( $log->{VERSION} eq "98" ) {
611 0           for (my $i=0; $i<$log->{NBASIS}-1; $i++) {
612 0           $log->{MOSYMM} [0] [$i] = "a";
613             }
614             } else {
615 0           for (my $i=0; $i<$log->{NBASIS}; $i++) {
616 0           $log->{MOSYMM} [0] [$i] = "a";
617             }
618             }
619             }
620             }
621              
622             1;
623             __END__