File Coverage

blib/lib/Chemistry/Mol.pm
Criterion Covered Total %
statement 240 334 71.8
branch 41 76 53.9
condition 2 7 28.5
subroutine 50 60 83.3
pod 37 45 82.2
total 370 522 70.8


line stmt bran cond sub pod time code
1             package Chemistry::Mol;
2             $VERSION = '0.37';
3             # $Id: Mol.pm,v 1.50 2009/05/10 19:37:58 itubert Exp $
4              
5             =head1 NAME
6              
7             Chemistry::Mol - Molecule object toolkit
8              
9             =head1 SYNOPSIS
10              
11             use Chemistry::Mol;
12              
13             $mol = Chemistry::Mol->new(id => "mol_id", name => "my molecule");
14             $c = $mol->new_atom(symbol => "C", coords => [0,0,0]);
15             $o = $mol->new_atom(symbol => "O", coords => [0,0,1.23]);
16             $mol->new_bond(atoms => [$c, $o], order => 3);
17              
18             print $mol->print;
19              
20             =head1 DESCRIPTION
21              
22             This package, along with Chemistry::Atom and Chemistry::Bond, includes basic
23             objects and methods to describe molecules.
24              
25             The core methods try not to enforce a particular convention. This means that
26             only a minimal set of attributes is provided by default, and some attributes
27             have very loosely defined meaning. This is because each program and file type
28             has different idea of what each concept (such as bond and atom type) means.
29             Bonds are defined as a list of atoms (typically two) with an arbitrary type.
30             Atoms are defined by a symbol and a Z, and may have 3D and internal coordinates
31             (2D coming soon).
32              
33             =cut
34              
35 17     17   208055 use 5.006;
  17         65  
  17         717  
36 17     17   119 use strict;
  17         30  
  17         586  
37 17     17   113 use warnings;
  17         35  
  17         553  
38 17     17   14771 use Chemistry::Atom;
  17         52  
  17         1226  
39 17     17   17087 use Chemistry::Bond;
  17         47  
  17         489  
40 17     17   293 use Carp;
  17         35  
  17         1493  
41 17     17   93 use base qw(Chemistry::Obj Exporter);
  17         30  
  17         4017  
42 17     17   47881 use Storable 'dclone';
  17         98601  
  17         47434  
43              
44             our @EXPORT_OK = qw(read_mol);
45             our @EXPORT = ();
46             our %EXPORT_TAGS = (
47             all => [@EXPORT, @EXPORT_OK],
48             );
49              
50              
51              
52             my %FILE_FORMATS = ();
53              
54             =head1 METHODS
55              
56             See also L for generic attributes.
57              
58             =over 4
59              
60             =item Chemistry::Mol->new(name => value, ...)
61              
62             Create a new Mol object with the specified attributes.
63              
64             $mol = Chemistry::Mol->new(id => 'm123', name => 'my mol')
65              
66             is the same as
67              
68             Chemistry::Mol->new()
69             $mol->id('m123')
70             $mol->name('my mol')
71              
72             =cut
73              
74             sub new {
75 26     26 1 1565 my $class = shift;
76 26         61 my %args = @_;
77 26   66     104 my $self = bless {
78             id => $class->nextID,
79             byId => {},
80             atoms => [],
81             bonds => [],
82             name => "",
83             }, ref $class || $class;
84 26         137 $self->$_($args{$_}) for (keys %args);
85 26         92 return $self;
86             }
87              
88             my $N = 0; # molecule ID counter
89 27     27 0 368 sub nextID { "mol".++$N; }
90 0     0 0 0 sub reset_id { $N = 0; }
91 0     0 0 0 sub next_id { $N = $_[1] }
92              
93             =item $mol->add_atom($atom, ...)
94              
95             Add one or more Atom objects to the molecule. Returns the last atom added.
96              
97             =cut
98              
99             sub add_atom {
100 552     552 1 1747 my $self = shift;
101 552         848 for my $atom (@_){
102             #if ($self->by_id($atom->id)) {
103             #croak "Duplicate ID when adding atom '$atom' to mol '$self'";
104             #}
105 553         757 push @{$self->{atoms}}, $atom;
  553         2199  
106 553         2728 $self->{byId}{$atom->id} = $atom;
107 553         2628 $atom->parent($self);
108             }
109 552         2588 $_[-1];
110             }
111              
112             sub add_atom_np {
113 0     0 0 0 my $self = shift;
114 0         0 for my $atom (@_){
115 0         0 push @{$self->{atoms}}, $atom;
  0         0  
116 0         0 $self->{byId}{$atom->id} = $atom;
117             }
118 0         0 $_[-1];
119             }
120              
121             =item $mol->atom_class
122              
123             Returns the atom class that a molecule or molecule class expects to use by
124             default. L objects return "Chemistry::Atom", but subclasses
125             will likely override this method.
126              
127             =cut
128              
129             sub atom_class {
130 14     14 1 63 "Chemistry::Atom";
131             }
132              
133             =item $mol->new_atom(name => value, ...)
134              
135             Shorthand for C<< $mol->add_atom($mol->atom_class->new(name => value, ...)) >>.
136              
137             =cut
138              
139             sub new_atom {
140 14     14 1 80 my $self = shift;
141 14         36 $self->add_atom($self->atom_class->new(@_));
142             }
143              
144             =item $mol->delete_atom($atom, ...)
145              
146             Deletes an atom from the molecule. It automatically deletes all the bonds in
147             which the atom participates as well. $atom should be a Chemistry::Atom
148             reference. This method also accepts the atom index, but this use is deprecated
149             (and buggy if multiple indices are given, unless they are in descending order).
150              
151             =cut
152              
153             sub delete_atom {
154 2     2 1 5 my $self = shift;
155 2         6 for my $i (@_) {
156 2         4 my ($atom);
157 2 100       7 if (ref $i) {
158 1         3 $atom = $i;
159             } else {
160 1 50       4 $atom = $self->atoms($i)
161             or croak "$self->delete_atom: no such atom $i\n";
162             }
163 2         10 $atom->delete($i);
164             }
165             }
166              
167             # takes an atom ref to delete and optionally the atom index
168             # 1) deletes bonds that belonged to atom
169             # 2) deletes atom
170             sub _delete_atom {
171 7     7   13 my ($self, $atom) = @_;
172 7 50       32 my $index = $self->get_atom_index($atom)
173             or croak "$self->delete_atom: no such atom $atom\n";
174 7         39 my $id = $atom->id;
175 7         31 $self->delete_bond($atom->bonds);
176 7         56 delete $self->{byId}{$id};
177 7         25 splice @{$self->{atoms}}, $index - 1, 1;
  7         41  
178             }
179              
180             =item $mol->add_bond($bond, ...)
181              
182             Add one or more Bond objects to the molecule. Returns the last bond added.
183              
184             =cut
185              
186             sub add_bond {
187 25     25 1 45 my $self = shift;
188 25         43 for my $bond (@_){
189             #if ($self->by_id($bond->id)) {
190             #croak "Duplicate ID when adding bond '$bond' to mol '$self'";
191             #}
192 25         34 push @{$self->{bonds}}, $bond;
  25         55  
193 25         107 $self->{byId}{$bond->id} = $bond;
194 25 100       106 if ($bond->{deleted}) {
195 1         4 $_->add_bond($bond) for $bond->atoms;
196 1         3 $bond->{deleted} = 0;
197             }
198 25         88 $bond->parent($self);
199             }
200 25         71 $_[-1];
201             }
202              
203             sub add_bond_np {
204 0     0 0 0 my $self = shift;
205 0         0 for my $bond (@_){
206 0         0 push @{$self->{bonds}}, $bond;
  0         0  
207 0         0 $self->{byId}{$bond->id} = $bond;
208             }
209 0         0 $_[-1];
210             }
211              
212             =item $mol->bond_class
213              
214             Returns the bond class that a molecule or molecule class expects to use by
215             default. L objects return "Chemistry::Bond", but subclasses
216             will likely override this method.
217              
218             =cut
219              
220             sub bond_class {
221 8     8 1 122 "Chemistry::Bond";
222             }
223              
224             =item $mol->new_bond(name => value, ...)
225              
226             Shorthand for C<< $mol->add_bond($mol->bond_class->new(name => value, ...)) >>.
227              
228             =cut
229              
230             sub new_bond {
231 8     8 1 22 my $self = shift;
232 8         29 $self->add_bond($self->bond_class->new(@_));
233             }
234              
235             sub get_bond_index {
236 14     14 0 23 my ($self, $bond) = @_;
237 14         43 my $i;
238 14         37 for ($self->bonds) {
239 42         59 ++$i;
240 42 100       99 return $i if ($_ eq $bond);
241             }
242 0         0 undef;
243             }
244              
245             sub get_atom_index {
246 7     7 0 14 my ($self, $atom) = @_;
247 7         12 my $i;
248 7         21 for ($self->atoms) {
249 12         20 ++$i;
250 12 100       41 return $i if ($_ eq $atom);
251             }
252 0         0 undef;
253             }
254              
255             =item $mol->delete_bond($bond, ...)
256              
257             Deletes a bond from the molecule. $bond should be a L object.
258              
259             =cut
260              
261             # mol deletes bond
262             # bond tells atoms involved to forget about it
263              
264             sub delete_bond {
265 7     7 1 18 my $self = shift;
266 7         19 for my $i (@_){
267 11         17 my ($bond);
268 11 50       31 if (ref $i) {
269 11         15 $bond = $i;
270             } else {
271 0 0       0 $bond = $self->bonds($i)
272             or croak "$self->delete_bond($i): no such bond $i\n";
273             }
274 11         125 $bond->delete;
275             }
276             }
277              
278             sub _delete_bond {
279 14     14   23 my ($self, $bond) = @_;
280 14 50       44 my $index = $self->get_bond_index($bond)
281             #or croak "$self->delete_bond: no such bond $bond\n";
282             or return;
283 14         63 my $id = $bond->id;
284 14         39 delete $self->{byId}{$id};
285 14         24 splice @{$self->{bonds}}, $index - 1, 1;
  14         39  
286 14         64 $bond->delete_atoms;
287             }
288              
289             =item $mol->by_id($id)
290              
291             Return the atom or bond object with the corresponding id.
292              
293             =cut
294              
295             sub by_id {
296 3     3 1 7 my $self = shift;
297 3         6 my ($id) = @_;
298 3         20 $self->{byId}{$id};
299             }
300              
301             sub _change_id {
302 4     4   118 my ($self, $old_id, $new_id) = @_;
303 4         11 my $ref = $self->{byId}{$old_id};
304 4         13 $self->{byId}{$new_id} = $ref;
305 4         18 delete $self->{byId}{$old_id};
306             }
307              
308             =item $mol->atoms($n1, ...)
309              
310             Returns the atoms with the given indices, or all by default.
311             Indices start from one, not from zero.
312              
313             =cut
314              
315             sub atoms {
316 90     90 1 4773 my $self = shift;
317 90 100       266 if (@_) {
318 18         41 my @ats = map {$_ - 1} @_;
  24         76  
319 18         34 @{$self->{atoms}}[@ats];
  18         121  
320             } else {
321 72         92 @{$self->{atoms}};
  72         453  
322             }
323             }
324              
325             =item $mol->atoms_by_name($name)
326              
327             Returns the atoms with the given name (treated as an anchored regular
328             expression).
329              
330             =cut
331              
332             sub atoms_by_name {
333 1     1 1 3 my $self = shift;
334 1         20 my $re = qr/^$_[0]$/;
335 17     17   226 no warnings;
  17         43  
  17         10043  
336 1         5 my @ret = grep {$_->name =~ $re} $self->atoms;
  2         8  
337 1 50       10 wantarray ? @ret : $ret[0];
338             }
339              
340             =item $mol->sort_atoms($sub_ref)
341              
342             Sort the atoms in the molecule by using the comparison function given in
343             $sub_ref. This function should take two atoms as parameters and return -1, 0,
344             or 1 depending on whether the first atom should go before, same, or after the
345             second atom. For example, to sort by atomic number, you could use the
346             following:
347              
348             $mol->sort_atoms( sub { $_[0]->Z <=> $_[1]->Z } );
349              
350             Note that the atoms are passed as parameters and not as the package variables
351             $a and $b like the core sort function does. This is because $mol->sort will
352             likely be called from another package and we don't want to play with another
353             package's symbol table.
354              
355             =cut
356              
357             sub sort_atoms {
358 0     0 1 0 my ($self, $sub) = @_;
359 0         0 my @a = $self->atoms;
360 0         0 @a = sort { $sub->($a,$b) } @a;
  0         0  
361 0         0 $self->{atoms} = \@a;
362 0         0 $self;
363             }
364              
365             =item $mol->bonds($n1, ...)
366              
367             Returns the bonds with the given indices, or all by default.
368             Indices start from one, not from zero.
369              
370             =cut
371              
372             sub bonds {
373 49     49 1 531 my $self = shift;
374 49 100       507 if (@_) {
375 6         16 my @bonds = map {$_ - 1} @_;
  6         24  
376 6         14 @{$self->{bonds}}[@bonds];
  6         44  
377             } else {
378 43         60 @{$self->{bonds}};
  43         181  
379             }
380             }
381              
382             =item $mol->print(option => value...)
383              
384             Convert the molecule to a string representation. If no options are given,
385             a default YAML-like format is used (this may change in the future). Otherwise,
386             the format should be specified by using the C option.
387              
388             =cut
389              
390             sub print {
391 19     19 1 145 my $self = shift;
392 19         58 my (%opts) = @_;
393 19         26 my $ret;
394 19         31 local $" = ""; #"
395              
396 19 50       126 if ($opts{format}) {
397 19         57 return $self->formats($opts{format})->write_string($self, %opts);
398             }
399             # else use default printout
400 0         0 $ret = <
401             $self->{id}:
402             name: $self->{name}
403             END
404 0         0 $ret .= " attr:\n";
405 0         0 $ret .= $self->print_attr(2);
406 0         0 $ret .= " atoms:\n";
407 0         0 for my $a (@{$self->{atoms}}) { $ret .= $a->print(2) }
  0         0  
  0         0  
408 0         0 $ret .= " bonds:\n";
409 0         0 for my $b (@{$self->{bonds}}) { $ret .= $b->print(2) }
  0         0  
  0         0  
410 0         0 $ret;
411             }
412              
413             =item $s = $mol->sprintf($format)
414              
415             Format interesting molecular information in a concise way, as specified by
416             a printf-like format.
417              
418             %n - name
419             %f - formula
420             %f{formula with format} - (note: right braces within
421             the format should be escaped with a backslash)
422             %s - SMILES representation
423             %S - canonical SMILES representation
424             %m - mass
425             %8.3m - mass, formatted as %8.3f with core sprintf
426             %q - formal charge
427             %a - atom count
428             %b - bond count
429             %t - type
430             %i - id
431             %% - %
432              
433             For example, if you want just about everything:
434              
435             $mol->sprintf("%s - %n (%f). %a atoms, %b bonds; "
436             . "mass=%m; charge =%q; type=%t; id=%i");
437              
438             Note that you have to C before using C<%s> or
439             C<%S> on C<< $mol->sprintf >>.
440              
441             =cut
442              
443             sub sprintf {
444 0     0 1 0 my ($mol, $format) = @_;
445 17     17   125 no warnings 'uninitialized'; # don't care if some properties are undefined
  17         37  
  17         63099  
446 0   0     0 $format ||= "%f";
447 0         0 $format =~ s/%%/\\%/g; # escape %% with a \
448 0         0 $format =~ s/(?formula($1)/eg; # %f{}
  0         0  
449 0         0 $format =~ s/(?formula/eg; # %f
  0         0  
450 0         0 $format =~ s/(?print(format=>'smiles')/eg; # %s
  0         0  
451 0         0 $format =~ s/(?print(format=>'smiles', unique => 1)/eg; # %s
  0         0  
452 0         0 $format =~ s/(?name/eg; # %n
  0         0  
453 0         0 $format =~ s/(?
454 0 0       0 $1 ? sprintf "%$1f", $mol->mass : $mol->mass/eg; # %m
455 0         0 $format =~ s/(?charge/eg; # %q
  0         0  
456 0         0 $format =~ s/(?atoms/eg; # %a
  0         0  
457 0         0 $format =~ s/(?bonds/eg; # %b
  0         0  
458 0         0 $format =~ s/(?type/eg; # %t
  0         0  
459 0         0 $format =~ s/(?id/eg; # %i
  0         0  
460 0         0 $format =~ s/\\(.)/$1/g; # other \ escapes
461 0         0 $format;
462             }
463              
464             =item $mol->printf($format)
465              
466             Same as C<< $mol->sprintf >>, but prints to standard output automatically.
467             Used for quick and dirty molecular information dumping.
468              
469             =cut
470              
471             sub printf {
472 0     0 1 0 my ($mol, $format) = @_;
473 0         0 print $mol->sprintf($format);
474             }
475              
476             =item Chemistry::Mol->parse($string, option => value...)
477              
478             Parse the molecule encoded in C<$string>. The format should be specified
479             with the the C option; otherwise, it will be guessed.
480              
481             =cut
482              
483             sub parse {
484 14     14 1 13839 my $self = shift;
485 14         26 my $s = shift;
486 14         62 my %opts = (mol_class => $self, @_);
487              
488 14 50       41 if ($opts{format}) {
489 14         54 return $self->formats($opts{format})->parse_string($s, %opts);
490             } else {
491 0         0 croak "Parse does not support autodetection yet.",
492             "Please specify a format.";
493             }
494 0         0 return;
495             }
496              
497             =item Chemistry::Mol->read($fname, option => value ...)
498              
499             Read a file and return a list of Mol objects, or croaks if there was a problem.
500             The type of file will be guessed if not specified via the C option.
501              
502             Note that only registered file readers will be used. Readers may be registered
503             using C; modules that include readers (such as
504             L) usually register them automatically when they are
505             loaded.
506              
507             Automatic decompression of gzipped files is supported if the L
508             module is installed. Files ending in .gz are assumed to be compressed;
509             otherwise it is possible to force decompression by passing the gzip => 1
510             option (or no decompression with gzip => 0).
511              
512             =cut
513              
514             sub read_mol { # for backwards compatibility
515 0     0 0 0 my ($fname, $type) = shift;
516 0         0 __PACKAGE__->read($fname, format => $type);
517             }
518              
519             sub read {
520 11     11 1 90902 my $self = shift;
521 11         27 my $fname = shift;
522 11         57 my %opts = (mol_class => $self, @_);
523              
524 11 100       50 if ($opts{format}) {
525 3         18 return $self->formats($opts{format})->parse_file($fname, %opts);
526             } else { # guess format
527 8         41 for my $type ($self->formats) {
528 8 50       30 if ($self->formats($type)->file_is($fname)) {
529 8         31 return $self->formats($type)->parse_file($fname, %opts);
530             }
531             }
532             }
533 0         0 croak "Couldn't guess format of file '$fname'";
534             }
535              
536             =item $mol->write($fname, option => value ...)
537              
538             Write a molecule file, or croak if there was a problem. The type of file will
539             be guessed if not specified via the C option.
540              
541             Note that only registered file formats will be used.
542              
543             Automatic gzip compression is supported if the IO::Zlib module is installed.
544             Files ending in .gz are assumed to be compressed; otherwise it is possible to
545             force compression by passing the gzip => 1 option (or no compression with gzip
546             => 0). Specific compression levels between 2 (fastest) and 9 (most compressed)
547             may also be used (e.g., gzip => 9).
548              
549             =cut
550              
551             sub write {
552 3     3 1 840 my ($self, $fname, %opts) = (@_);
553              
554 3 100       15 if ($opts{format}) {
555 2         12 return $self->formats($opts{format})->write_file(@_);
556             } else { # guess format
557 1         5 for my $type ($self->formats) {
558 1 50       14 if ($self->formats($type)->name_is($fname)) {
559 1         6 return $self->formats($type)->write_file(@_);
560             }
561             }
562             }
563 0         0 croak "Couldn't guess format for writing file '$fname'";
564             }
565              
566             =item Chemistry::Mol->file($file, option => value ...)
567              
568             Create a L-derived object for reading or writing to a file.
569             The object can then be used to read the molecules or other information in the
570             file.
571              
572             This has more flexibility than calling C<< Chemistry::Mol->read >> when
573             dealing with multi-molecule files or files that have higher structure or that
574             have information that does not belong to the molecules themselves. For
575             example, a reaction file may have a list of molecules, but also general
576             information like the reaction name, yield, etc. as well as the classification
577             of the molecules as reactants or products. The exact information that is
578             available will depend on the file reader class that is being used. The
579             following is a hypothetical example for reading MDL rxnfiles.
580              
581             # assuming this module existed...
582             use Chemistry::File::Rxn;
583              
584             my $rxn = Chemistry::Mol->file('test.rxn');
585             $rxn->read;
586             $name = $rxn->name;
587             @reactants = $rxn->reactants; # mol objects
588             @products = $rxn->products;
589             $yield = $rxn->yield; # a number
590              
591             Note that only registered file readers will be used. Readers may be registered
592             using register_format(); modules that include readers (such as
593             Chemistry::File::PDB) usually register them automatically.
594              
595             =cut
596              
597             sub file {
598 1     1 1 4 my ($self, $file, %opts) = @_;
599 1         6 %opts = (mol_class => $self, %opts);
600              
601 1 50       9 if ($opts{format}) {
602 0         0 return $self->formats($opts{format})->new(file => $file,
603             opts => \%opts);
604             } else { # guess format
605 1         5 for my $type ($self->formats) {
606 1 50       5 if ($self->formats($type)->file_is($file)) {
607 1         4 return $self->formats($type)->new(file => $file,
608             opts => \%opts);
609             }
610             }
611             }
612 0         0 croak "Couldn't guess format of file '$file'";
613             }
614              
615             =item Chemistry::Mol->register_format($name, $ref)
616              
617             Register a file type. The identifier $name must be unique. $ref is either a
618             class name (a package) or an object that complies with the L
619             interface (e.g., a subclass of Chemistry::File). If $ref is omitted, the
620             calling package is used automatically. More than one format can be registered
621             at a time, but then $ref must be included for each format (e.g.,
622             Chemistry::Mol->register_format(format1 => "package1", format2 => package2).
623              
624             The typical user doesn't have to care about this function. It is used
625             automatically by molecule file I/O modules.
626              
627             =cut
628              
629             sub register_format {
630 15     15 1 21529 my $class = shift;
631 15 100       112 if (@_ == 1) {
632 4         20 $FILE_FORMATS{$_[0]} = caller;
633 4         18 return;
634             }
635 11         51 my %opts = @_;
636 11         105 $FILE_FORMATS{$_} = $opts{$_} for keys %opts;
637             }
638              
639             =item Chemistry::Mol->formats
640              
641             Returns a list of the file formats that have been installed by
642             register_format()
643              
644             =cut
645              
646             sub formats {
647 68     68 1 123 my $self = shift;
648 68 100       172 if (@_) {
649 58         87 my ($type) = @_;
650 58         117 my $file_class = $FILE_FORMATS{$type};
651 58 50       179 unless ($file_class) {
652 0         0 croak "No class installed for type '$type'";
653             }
654 58         554 return $file_class;
655             } else {
656 10         68 return sort keys %FILE_FORMATS;
657             }
658             }
659              
660             =item $mol->mass
661              
662             Return the molar mass. This is just the sum of the masses of the atoms. See
663             L::mass for details such as the handling of isotopes.
664              
665             =cut
666              
667             sub mass {
668 2     2 1 8 my ($self) = @_;
669 2         4 my $mass = 0;
670 2         6 for my $atom ($self->atoms) {
671 6         17 $mass += $atom->mass;
672             }
673 2         14 $mass;
674             }
675              
676             =item $mol->charge
677              
678             Return the charge of the molecule. By default it returns the sum of the formal
679             charges of the atoms. However, it is possible to set an arbitrary charge by
680             calling C<< $mol->charge($new_charge) >>
681              
682             =cut
683              
684             sub charge {
685 0     0 1 0 my ($self) = shift;
686 0 0       0 if (@_) {
687 0         0 $self->{charge} = shift;
688 0         0 $self;
689             } else {
690 0 0       0 return $self->{charge} if defined $self->{charge};
691 0         0 my $charge = 0;
692 0   0     0 $charge += $_->formal_charge || 0 for $self->atoms;
693 0         0 $charge;
694             }
695             }
696              
697             =item $mol->formula_hash
698              
699             Returns a hash reference describing the molecular formula. For methane it would
700             return { C => 1, H => 4 }.
701              
702             =cut
703              
704             sub formula_hash {
705 17     17 1 73 my ($self) = @_;
706 17         26 my $formula = {};
707 17         56 for my $atom ($self->atoms) {
708 538         1444 $formula->{$atom->symbol}++;
709 538 50       1601 $formula->{H} += $atom->hydrogens if $atom->hydrogens;
710             }
711 17         214 $formula;
712             }
713              
714             =item $mol->formula($format)
715              
716             Returns a string with the formula. The format can be specified as a printf-like
717             string with the control sequences specified in the L
718             documentation.
719              
720             =cut
721              
722             sub formula {
723 5     5 1 1547 my ($self, $format) = @_;
724 5         1072 require Chemistry::File::Formula;
725 5         22 $self->print(format => "formula", formula_format => $format);
726             }
727              
728             =item my $mol2 = $mol->clone;
729              
730             Makes a copy of a molecule. Note that this is a B copy; if your molecule
731             has a pointer to the rest of the universe, the entire universe will be cloned!
732              
733             =cut
734              
735             sub clone {
736 8     8 1 15 my ($self) = @_;
737 8         2328 my $clone = dclone $self;
738 8 50       155 $clone->_weaken if Storable->VERSION < 2.14;
739 8         33 $clone;
740             }
741              
742             =item my $mol2 = $mol->safe_clone;
743              
744             Like clone, it makes a deep copy of a molecule. The difference is that the copy
745             is not "exact" in that new molecule and its atoms and bonds get assigned new
746             IDs. This makes it safe to combine cloned molecules. For example, this is an
747             error:
748              
749             # XXX don't try this at home!
750             my $mol2 = Chemistry::Mol->combine($mol1, $mol1);
751             # the atoms in $mol1 will clash
752              
753             But this is ok:
754              
755             # the "safe clone" of $mol1 will have new IDs
756             my $mol2 = Chemistry::Mol->combine($mol1, $mol1->safe_clone);
757              
758             =cut
759              
760             sub safe_clone {
761 1     1 1 2 my ($mol) = @_;
762 1         3 my $clone = $mol->clone;
763 1         4 for ($clone, $clone->atoms, $clone->bonds) {
764 4         17 $_->id($_->nextID);
765             }
766 1         3 $clone;
767             }
768              
769             sub _weaken {
770 14     14   36 my ($self) = @_;
771 14         70 for ($self->atoms, $self->bonds) {
772 196         581 $_->_weaken;
773             }
774 14         51 $self;
775             }
776              
777             =item ($distance, $atom_here, $atom_there) = $mol->distance($obj)
778              
779             Returns the minimum distance to $obj, which can be an atom, a molecule, or a
780             vector. In scalar context it returns only the distance; in list context it
781             also returns the atoms involved. The current implementation for calculating
782             the minimum distance between two molecules compares every possible pair of
783             atoms, so it's not efficient for large molecules.
784              
785             =cut
786              
787             sub distance {
788 0     0 1 0 my ($self, $other) = @_;
789 0 0       0 if ($other->isa("Chemistry::Mol")) {
    0          
    0          
790 0         0 my @atoms = $self->atoms;
791 0 0       0 my $atom = shift @atoms or return; # need at least one atom
792 0         0 my $closest_here = $atom;
793 0         0 my ($min_length, $closest_there) = $atom->distance($other);
794 0         0 for $atom (@atoms) {
795 0         0 my ($d, $o) = $atom->distance($other);
796 0 0       0 if ($d < $min_length) {
797 0         0 ($min_length, $closest_there, $closest_here) = ($d, $o, $atom);
798             }
799             }
800             return wantarray ?
801 0 0       0 ($min_length, $closest_here, $closest_there) : $min_length;
802             } elsif ($other->isa("Chemistry::Atom")) {
803 0         0 return $other->distance($self);
804             } elsif ($other->isa("Math::VectorReal")) {
805 0         0 return Chemistry::Atom->new(coords => $other)->distance($self);
806             }
807             }
808              
809             =item my $bigmol = Chemistry::Mol->combine($mol1, $mol2, ...)
810              
811             =item $mol1->combine($mol2, $mol3, ...)
812              
813             Combines several molecules in one bigger molecule. If called as a class method,
814             as in the first example, it returns a new combined molecule without altering
815             any of the parameters. If called as an instance method, as in the second
816             example, all molecules are combined into $mol1 (but $mol2, $mol3, ...) are not
817             altered. B: Make sure you don't combine molecules which contain atoms
818             with duplicate IDs (for example, if they were cloned).
819              
820             =cut
821              
822             # joins several molecules into one
823             sub combine {
824 2     2 1 455 my ($self, @others) = @_;
825 2         4 my $mol;
826 2 100       6 if (ref $self) {
827 1         3 $mol = $self;
828             } else {
829 1         5 $mol = $self->new;
830             }
831 2         5 for my $other (@others) {
832 3         9 my $mol2 = $other->clone;
833 3         8 for my $atom ($mol2->atoms) {
834 12         59 $mol->add_atom($atom);
835             }
836 3         10 for my $bond ($mol2->bonds) {
837 9         24 $mol->add_bond($bond);
838             }
839             }
840 2         7 $mol;
841             }
842              
843             =item my @mols = $mol->separate
844              
845             Separates a molecule into "connected fragments". The original object is not
846             modified; the fragments are clones of the original ones. Example: if you have
847             ethane (H3CCH3) and you delete the C-C bond, you have two CH3 radicals within
848             one molecule object ($mol). When you call $mol->separate you get two molecules,
849             each one with a CH3.
850              
851             =cut
852              
853             # splits a molecule into connected fragments
854             # returns a list of molecules. Does not touch the original copy.
855             sub separate {
856 1     1 1 946 my ($self) = @_;
857 1         5 $self = $self->clone;
858 1         4 $self->{_paint_tab} = {};
859 1         3 my $color = 0;
860 1         6 for my $atom ($self->atoms) {
861 8 100       28 next if defined $self->{_paint_tab}{$atom->id};
862 2         10 $self->_paint($atom, $color++);
863             }
864 1         3 my @mols;
865 1         6 push @mols, $self->new for (1 .. $color);
866 1         4 for my $atom ($self->atoms) {
867 8         25 $mols[$self->{_paint_tab}{$atom->id}]->add_atom($atom);
868             }
869 1         5 for my $bond ($self->bonds) {
870 6         18 $mols[$self->{_paint_tab}{$bond->id}]->add_bond($bond);
871             }
872 1         19 @mols;
873             }
874              
875             # this method fills the _paint_tab attribute for every atom connected
876             # to the given start atom $atom with $color. Used for separating
877             # connected fragments. Uses a depth-first search
878             sub _paint {
879 14     14   22 my ($self, $atom, $color) = @_;
880 14 100       34 return if defined $self->{_paint_tab}{$atom->id};
881 8         22 $self->{_paint_tab}{$atom->id} = $color;
882 8         21 $self->{_paint_tab}{$_->id} = $color for ($atom->bonds);
883 8         23 for my $neighbor ($atom->neighbors) {
884 12         25 $self->_paint($neighbor, $color);
885             }
886             }
887              
888             =item $mol->sprout_hydrogens
889              
890             Convert all the implicit hydrogen atoms in the molecule to explicit atoms.
891             It does B generate coordinates for the atoms.
892              
893             =cut
894              
895             sub sprout_hydrogens {
896 1     1 1 2 my ($self) = @_;
897 1         4 $_->sprout_hydrogens for $self->atoms;
898             }
899              
900             =item $mol->collapse_hydrogens
901              
902             Convert all the explicit hydrogen atoms in the molecule to implicit hydrogens.
903             (Exception: hydrogen atoms that are adjacent to a hydrogen atom are not
904             collapsed.)
905              
906             =cut
907              
908             sub collapse_hydrogens {
909 1     1 1 3 my ($self) = @_;
910 1         5 for my $atom (grep { $_->symbol ne 'H' } $self->atoms) {
  3         12  
911 1         7 $atom->collapse_hydrogens;
912             }
913             }
914              
915             =item $mol->add_implicit_hydrogens
916              
917             Use heuristics to figure out how many implicit hydrogens should each atom in
918             the molecule have to satisfy its normal "organic" valence.
919              
920             =cut
921              
922             sub add_implicit_hydrogens {
923 1     1 1 7 my ($self) = @_;
924 1         5 $_->add_implicit_hydrogens for $self->atoms;
925             }
926              
927              
928             my %DESCRIPTORS = ();
929              
930             =item Chemistry::Mol->register_descriptor($name => $sub_ref)
931              
932             Adds a callback that can be used to add functionality to the molecule class
933             (originally meant to add custom molecule descriptors.) A descriptor is a
934             function that takes a molecule object as its only argument and returns a value
935             or values. For example, to add a descriptor function that computes the number
936             of atoms:
937              
938             Chemistry::Mol->register_descriptor(
939             number_of_atoms => sub {
940             my $mol = shift;
941             return scalar $mol->atoms;
942             }
943             );
944              
945             The descriptor is accessed by name via the C instance method:
946              
947             my $n = $mol->descriptor('number_of_atoms');
948              
949             =cut
950              
951             sub register_descriptor {
952 1     1 1 74 my ($self, %opts) = @_;
953 1         8 $DESCRIPTORS{$_} = $opts{$_} for keys %opts;
954             }
955              
956             =item my $value = $mol->descriptor($descriptor_name)
957              
958             Calls a previously registered descriptor function giving it $mol as an
959             argument, as shown above for C.
960              
961             =cut
962              
963             sub descriptor {
964 1     1 1 3 my ($self, $descriptor) = @_;
965 1 50       6 my $sub = $DESCRIPTORS{$descriptor}
966             or croak "unknown descriptor '$descriptor'";
967 1         5 return $sub->($self);
968             }
969              
970             1;
971              
972             =back
973              
974             =head1 VERSION
975              
976             0.37
977              
978             =head1 SEE ALSO
979              
980             L, L, L,
981             L
982              
983             The PerlMol website L
984              
985             =head1 AUTHOR
986              
987             Ivan Tubert-Brohman Eitub@cpan.orgE
988              
989             =head1 COPYRIGHT
990              
991             Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is
992             free software; you can redistribute it and/or modify it under the same terms as
993             Perl itself.
994              
995             =cut
996