File Coverage

blib/lib/Chemistry/Mol.pm
Criterion Covered Total %
statement 243 338 71.8
branch 44 80 55.0
condition 2 7 28.5
subroutine 50 60 83.3
pod 37 45 82.2
total 376 530 70.9


line stmt bran cond sub pod time code
1             package Chemistry::Mol;
2              
3             our $VERSION = '0.39'; # VERSION
4             # $Id$
5              
6             =head1 NAME
7              
8             Chemistry::Mol - Molecule object toolkit
9              
10             =head1 SYNOPSIS
11              
12             use Chemistry::Mol;
13              
14             $mol = Chemistry::Mol->new(id => "mol_id", name => "my molecule");
15             $c = $mol->new_atom(symbol => "C", coords => [0,0,0]);
16             $o = $mol->new_atom(symbol => "O", coords => [0,0,1.23]);
17             $mol->new_bond(atoms => [$c, $o], order => 3);
18              
19             print $mol->print;
20              
21             =head1 DESCRIPTION
22              
23             This package, along with Chemistry::Atom and Chemistry::Bond, includes basic
24             objects and methods to describe molecules.
25              
26             The core methods try not to enforce a particular convention. This means that
27             only a minimal set of attributes is provided by default, and some attributes
28             have very loosely defined meaning. This is because each program and file type
29             has different idea of what each concept (such as bond and atom type) means.
30             Bonds are defined as a list of atoms (typically two) with an arbitrary type.
31             Atoms are defined by a symbol and a Z, and may have 3D and internal coordinates
32             (2D coming soon).
33              
34             =cut
35              
36 16     16   300916 use 5.006;
  16         87  
37 16     16   75 use strict;
  16         24  
  16         322  
38 16     16   69 use warnings;
  16         174  
  16         497  
39 16     16   6491 use Chemistry::Atom;
  16         51  
  16         931  
40 16     16   7547 use Chemistry::Bond;
  16         37  
  16         384  
41 16     16   90 use Carp;
  16         27  
  16         846  
42 16     16   79 use base qw(Chemistry::Obj Exporter);
  16         33  
  16         1624  
43 16     16   9169 use Storable 'dclone';
  16         45497  
  16         19500  
44              
45             our @EXPORT_OK = qw(read_mol);
46             our @EXPORT = ();
47             our %EXPORT_TAGS = (
48             all => [@EXPORT, @EXPORT_OK],
49             );
50              
51             our $clone_backend = 'Storable';
52              
53             my %FILE_FORMATS = ();
54              
55             =head1 METHODS
56              
57             See also L for generic attributes.
58              
59             =over 4
60              
61             =item Chemistry::Mol->new(name => value, ...)
62              
63             Create a new Mol object with the specified attributes.
64              
65             $mol = Chemistry::Mol->new(id => 'm123', name => 'my mol')
66              
67             is the same as
68              
69             Chemistry::Mol->new()
70             $mol->id('m123')
71             $mol->name('my mol')
72              
73             =cut
74              
75             sub new {
76 26     26 1 1689 my $class = shift;
77 26         48 my %args = @_;
78 26   66     61 my $self = bless {
79             id => $class->nextID,
80             byId => {},
81             atoms => [],
82             bonds => [],
83             name => "",
84             }, ref $class || $class;
85 26         78 $self->$_($args{$_}) for (keys %args);
86 26         78 return $self;
87             }
88              
89             my $N = 0; # molecule ID counter
90 27     27 0 216 sub nextID { "mol".++$N; }
91 0     0 0 0 sub reset_id { $N = 0; }
92 0     0 0 0 sub next_id { $N = $_[1] }
93              
94             =item $mol->add_atom($atom, ...)
95              
96             Add one or more Atom objects to the molecule. Returns the last atom added.
97              
98             =cut
99              
100             sub add_atom {
101 552     552 1 1419 my $self = shift;
102 552         681 for my $atom (@_){
103             #if ($self->by_id($atom->id)) {
104             #croak "Duplicate ID when adding atom '$atom' to mol '$self'";
105             #}
106 553         557 push @{$self->{atoms}}, $atom;
  553         1028  
107 553         941 $self->{byId}{$atom->id} = $atom;
108 553         1042 $atom->parent($self);
109             }
110 552         1132 $_[-1];
111             }
112              
113             sub add_atom_np {
114 0     0 0 0 my $self = shift;
115 0         0 for my $atom (@_){
116 0         0 push @{$self->{atoms}}, $atom;
  0         0  
117 0         0 $self->{byId}{$atom->id} = $atom;
118             }
119 0         0 $_[-1];
120             }
121              
122             =item $mol->atom_class
123              
124             Returns the atom class that a molecule or molecule class expects to use by
125             default. L objects return "Chemistry::Atom", but subclasses
126             will likely override this method.
127              
128             =cut
129              
130             sub atom_class {
131 14     14 1 46 "Chemistry::Atom";
132             }
133              
134             =item $mol->new_atom(name => value, ...)
135              
136             Shorthand for C<< $mol->add_atom($mol->atom_class->new(name => value, ...)) >>.
137              
138             =cut
139              
140             sub new_atom {
141 14     14 1 61 my $self = shift;
142 14         34 $self->add_atom($self->atom_class->new(@_));
143             }
144              
145             =item $mol->delete_atom($atom, ...)
146              
147             Deletes an atom from the molecule. It automatically deletes all the bonds in
148             which the atom participates as well. $atom should be a Chemistry::Atom
149             reference. This method also accepts the atom index, but this use is deprecated
150             (and buggy if multiple indices are given, unless they are in descending order).
151              
152             =cut
153              
154             sub delete_atom {
155 2     2 1 4 my $self = shift;
156 2         5 for my $i (@_) {
157 2         4 my ($atom);
158 2 100       6 if (ref $i) {
159 1         2 $atom = $i;
160             } else {
161 1 50       4 $atom = $self->atoms($i)
162             or croak "$self->delete_atom: no such atom $i\n";
163             }
164 2         8 $atom->delete($i);
165             }
166             }
167              
168             # takes an atom ref to delete and optionally the atom index
169             # 1) deletes bonds that belonged to atom
170             # 2) deletes atom
171             sub _delete_atom {
172 7     7   20 my ($self, $atom) = @_;
173 7 50       19 my $index = $self->get_atom_index($atom)
174             or croak "$self->delete_atom: no such atom $atom\n";
175 7         71 my $id = $atom->id;
176 7         40 $self->delete_bond($atom->bonds);
177 7         39 delete $self->{byId}{$id};
178 7         22 splice @{$self->{atoms}}, $index - 1, 1;
  7         28  
179             }
180              
181             =item $mol->add_bond($bond, ...)
182              
183             Add one or more Bond objects to the molecule. Returns the last bond added.
184              
185             =cut
186              
187             sub add_bond {
188 25     25 1 38 my $self = shift;
189 25         40 for my $bond (@_){
190             #if ($self->by_id($bond->id)) {
191             #croak "Duplicate ID when adding bond '$bond' to mol '$self'";
192             #}
193 25         30 push @{$self->{bonds}}, $bond;
  25         47  
194 25         61 $self->{byId}{$bond->id} = $bond;
195 25 100       56 if ($bond->{deleted}) {
196 1         2 $_->add_bond($bond) for $bond->atoms;
197 1         3 $bond->{deleted} = 0;
198             }
199 25         57 $bond->parent($self);
200             }
201 25         64 $_[-1];
202             }
203              
204             sub add_bond_np {
205 0     0 0 0 my $self = shift;
206 0         0 for my $bond (@_){
207 0         0 push @{$self->{bonds}}, $bond;
  0         0  
208 0         0 $self->{byId}{$bond->id} = $bond;
209             }
210 0         0 $_[-1];
211             }
212              
213             =item $mol->bond_class
214              
215             Returns the bond class that a molecule or molecule class expects to use by
216             default. L objects return "Chemistry::Bond", but subclasses
217             will likely override this method.
218              
219             =cut
220              
221             sub bond_class {
222 8     8 1 55 "Chemistry::Bond";
223             }
224              
225             =item $mol->new_bond(name => value, ...)
226              
227             Shorthand for C<< $mol->add_bond($mol->bond_class->new(name => value, ...)) >>.
228              
229             =cut
230              
231             sub new_bond {
232 8     8 1 28 my $self = shift;
233 8         28 $self->add_bond($self->bond_class->new(@_));
234             }
235              
236             sub get_bond_index {
237 14     14 0 29 my ($self, $bond) = @_;
238 14         18 my $i;
239 14         31 for ($self->bonds) {
240 42         50 ++$i;
241 42 100       74 return $i if ($_ eq $bond);
242             }
243 0         0 undef;
244             }
245              
246             sub get_atom_index {
247 7     7 0 12 my ($self, $atom) = @_;
248 7         11 my $i;
249 7         16 for ($self->atoms) {
250 12         19 ++$i;
251 12 100       32 return $i if ($_ eq $atom);
252             }
253 0         0 undef;
254             }
255              
256             =item $mol->delete_bond($bond, ...)
257              
258             Deletes a bond from the molecule. $bond should be a L object.
259              
260             =cut
261              
262             # mol deletes bond
263             # bond tells atoms involved to forget about it
264              
265             sub delete_bond {
266 7     7 1 14 my $self = shift;
267 7         15 for my $i (@_){
268 11         13 my ($bond);
269 11 50       24 if (ref $i) {
270 11         18 $bond = $i;
271             } else {
272 0 0       0 $bond = $self->bonds($i)
273             or croak "$self->delete_bond($i): no such bond $i\n";
274             }
275 11         29 $bond->delete;
276             }
277             }
278              
279             sub _delete_bond {
280 14     14   29 my ($self, $bond) = @_;
281 14 50       35 my $index = $self->get_bond_index($bond)
282             #or croak "$self->delete_bond: no such bond $bond\n";
283             or return;
284 14         42 my $id = $bond->id;
285 14         30 delete $self->{byId}{$id};
286 14         22 splice @{$self->{bonds}}, $index - 1, 1;
  14         38  
287 14         37 $bond->delete_atoms;
288             }
289              
290             =item $mol->by_id($id)
291              
292             Return the atom or bond object with the corresponding id.
293              
294             =cut
295              
296             sub by_id {
297 3     3 1 6 my $self = shift;
298 3         7 my ($id) = @_;
299 3         14 $self->{byId}{$id};
300             }
301              
302             sub _change_id {
303 4     4   9 my ($self, $old_id, $new_id) = @_;
304 4         7 my $ref = $self->{byId}{$old_id};
305 4         9 $self->{byId}{$new_id} = $ref;
306 4         10 delete $self->{byId}{$old_id};
307             }
308              
309             =item $mol->atoms($n1, ...)
310              
311             Returns the atoms with the given indices, or all by default.
312             Indices start from one, not from zero.
313              
314             =cut
315              
316             sub atoms {
317 90     90 1 8230 my $self = shift;
318 90 100       214 if (@_) {
319 18         48 my @ats = map {$_ - 1} @_;
  24         68  
320 18         32 @{$self->{atoms}}[@ats];
  18         84  
321             } else {
322 72         88 @{$self->{atoms}};
  72         383  
323             }
324             }
325              
326             =item $mol->atoms_by_name($name)
327              
328             Returns the atoms with the given name (treated as an anchored regular
329             expression).
330              
331             =cut
332              
333             sub atoms_by_name {
334 1     1 1 3 my $self = shift;
335 1         28 my $re = qr/^$_[0]$/;
336 16     16   179 no warnings;
  16         36  
  16         7112  
337 1         5 my @ret = grep {$_->name =~ $re} $self->atoms;
  2         8  
338 1 50       8 wantarray ? @ret : $ret[0];
339             }
340              
341             =item $mol->sort_atoms($sub_ref)
342              
343             Sort the atoms in the molecule by using the comparison function given in
344             $sub_ref. This function should take two atoms as parameters and return -1, 0,
345             or 1 depending on whether the first atom should go before, same, or after the
346             second atom. For example, to sort by atomic number, you could use the
347             following:
348              
349             $mol->sort_atoms( sub { $_[0]->Z <=> $_[1]->Z } );
350              
351             Note that the atoms are passed as parameters and not as the package variables
352             $a and $b like the core sort function does. This is because $mol->sort will
353             likely be called from another package and we don't want to play with another
354             package's symbol table.
355              
356             =cut
357              
358             sub sort_atoms {
359 0     0 1 0 my ($self, $sub) = @_;
360 0         0 my @a = $self->atoms;
361 0         0 @a = sort { $sub->($a,$b) } @a;
  0         0  
362 0         0 $self->{atoms} = \@a;
363 0         0 $self;
364             }
365              
366             =item $mol->bonds($n1, ...)
367              
368             Returns the bonds with the given indices, or all by default.
369             Indices start from one, not from zero.
370              
371             =cut
372              
373             sub bonds {
374 49     49 1 648 my $self = shift;
375 49 100       113 if (@_) {
376 6         15 my @bonds = map {$_ - 1} @_;
  6         24  
377 6         12 @{$self->{bonds}}[@bonds];
  6         33  
378             } else {
379 43         54 @{$self->{bonds}};
  43         155  
380             }
381             }
382              
383             =item $mol->print(option => value...)
384              
385             Convert the molecule to a string representation. If no options are given,
386             a default YAML-like format is used (this may change in the future). Otherwise,
387             the format should be specified by using the C option.
388              
389             =cut
390              
391             sub print {
392 19     19 1 349 my $self = shift;
393 19         42 my (%opts) = @_;
394 19         21 my $ret;
395 19         25 local $" = ""; #"
396              
397 19 50       39 if ($opts{format}) {
398 19         41 return $self->formats($opts{format})->write_string($self, %opts);
399             }
400             # else use default printout
401 0         0 $ret = <
402             $self->{id}:
403             name: $self->{name}
404             END
405 0         0 $ret .= " attr:\n";
406 0         0 $ret .= $self->print_attr(2);
407 0         0 $ret .= " atoms:\n";
408 0         0 for my $a (@{$self->{atoms}}) { $ret .= $a->print(2) }
  0         0  
  0         0  
409 0         0 $ret .= " bonds:\n";
410 0         0 for my $b (@{$self->{bonds}}) { $ret .= $b->print(2) }
  0         0  
  0         0  
411 0         0 $ret;
412             }
413              
414             =item $s = $mol->sprintf($format)
415              
416             Format interesting molecular information in a concise way, as specified by
417             a printf-like format.
418              
419             %n - name
420             %f - formula
421             %f{formula with format} - (note: right braces within
422             the format should be escaped with a backslash)
423             %s - SMILES representation
424             %S - canonical SMILES representation
425             %m - mass
426             %8.3m - mass, formatted as %8.3f with core sprintf
427             %q - formal charge
428             %a - atom count
429             %b - bond count
430             %t - type
431             %i - id
432             %% - %
433              
434             For example, if you want just about everything:
435              
436             $mol->sprintf("%s - %n (%f). %a atoms, %b bonds; "
437             . "mass=%m; charge =%q; type=%t; id=%i");
438              
439             Note that you have to C before using C<%s> or
440             C<%S> on C<< $mol->sprintf >>.
441              
442             =cut
443              
444             sub sprintf {
445 0     0 1 0 my ($mol, $format) = @_;
446 16     16   116 no warnings 'uninitialized'; # don't care if some properties are undefined
  16         35  
  16         34072  
447 0   0     0 $format ||= "%f";
448 0         0 $format =~ s/%%/\\%/g; # escape %% with a \
449 0         0 $format =~ s/(?formula($1)/eg; # %f{}
  0         0  
450 0         0 $format =~ s/(?formula/eg; # %f
  0         0  
451 0         0 $format =~ s/(?print(format=>'smiles')/eg; # %s
  0         0  
452 0         0 $format =~ s/(?print(format=>'smiles', unique => 1)/eg; # %s
  0         0  
453 0         0 $format =~ s/(?name/eg; # %n
  0         0  
454 0         0 $format =~ s/(?
455 0 0       0 $1 ? sprintf "%$1f", $mol->mass : $mol->mass/eg; # %m
456 0         0 $format =~ s/(?charge/eg; # %q
  0         0  
457 0         0 $format =~ s/(?atoms/eg; # %a
  0         0  
458 0         0 $format =~ s/(?bonds/eg; # %b
  0         0  
459 0         0 $format =~ s/(?type/eg; # %t
  0         0  
460 0         0 $format =~ s/(?id/eg; # %i
  0         0  
461 0         0 $format =~ s/\\(.)/$1/g; # other \ escapes
462 0         0 $format;
463             }
464              
465             =item $mol->printf($format)
466              
467             Same as C<< $mol->sprintf >>, but prints to standard output automatically.
468             Used for quick and dirty molecular information dumping.
469              
470             =cut
471              
472             sub printf {
473 0     0 1 0 my ($mol, $format) = @_;
474 0         0 print $mol->sprintf($format);
475             }
476              
477             =item Chemistry::Mol->parse($string, option => value...)
478              
479             Parse the molecule encoded in C<$string>. The format should be specified
480             with the the C option; otherwise, it will be guessed.
481              
482             =cut
483              
484             sub parse {
485 14     14 1 8053 my $self = shift;
486 14         21 my $s = shift;
487 14         38 my %opts = (mol_class => $self, @_);
488              
489 14 50       33 if ($opts{format}) {
490 14         29 return $self->formats($opts{format})->parse_string($s, %opts);
491             } else {
492 0         0 croak "Parse does not support autodetection yet.",
493             "Please specify a format.";
494             }
495 0         0 return;
496             }
497              
498             =item Chemistry::Mol->read($fname, option => value ...)
499              
500             Read a file and return a list of Mol objects, or croaks if there was a problem.
501             The type of file will be guessed if not specified via the C option.
502              
503             Note that only registered file readers will be used. Readers may be registered
504             using C; modules that include readers (such as
505             L) usually register them automatically when they are
506             loaded.
507              
508             Automatic decompression of gzipped files is supported if the L
509             module is installed. Files ending in .gz are assumed to be compressed;
510             otherwise it is possible to force decompression by passing the gzip => 1
511             option (or no decompression with gzip => 0).
512              
513             =cut
514              
515             sub read_mol { # for backwards compatibility
516 0     0 0 0 my ($fname, $type) = shift;
517 0         0 __PACKAGE__->read($fname, format => $type);
518             }
519              
520             sub read {
521 11     11 1 51683 my $self = shift;
522 11         24 my $fname = shift;
523 11         43 my %opts = (mol_class => $self, @_);
524              
525 11 100       42 if ($opts{format}) {
526 3         12 return $self->formats($opts{format})->parse_file($fname, %opts);
527             } else { # guess format
528 8         31 for my $type ($self->formats) {
529 8 50       22 if ($self->formats($type)->file_is($fname)) {
530 8         26 return $self->formats($type)->parse_file($fname, %opts);
531             }
532             }
533             }
534 0         0 croak "Couldn't guess format of file '$fname'";
535             }
536              
537             =item $mol->write($fname, option => value ...)
538              
539             Write a molecule file, or croak if there was a problem. The type of file will
540             be guessed if not specified via the C option.
541              
542             Note that only registered file formats will be used.
543              
544             Automatic gzip compression is supported if the IO::Zlib module is installed.
545             Files ending in .gz are assumed to be compressed; otherwise it is possible to
546             force compression by passing the gzip => 1 option (or no compression with gzip
547             => 0). Specific compression levels between 2 (fastest) and 9 (most compressed)
548             may also be used (e.g., gzip => 9).
549              
550             =cut
551              
552             sub write {
553 3     3 1 572 my ($self, $fname, %opts) = (@_);
554              
555 3 100       11 if ($opts{format}) {
556 2         7 return $self->formats($opts{format})->write_file(@_);
557             } else { # guess format
558 1         4 for my $type ($self->formats) {
559 1 50       3 if ($self->formats($type)->name_is($fname)) {
560 1         5 return $self->formats($type)->write_file(@_);
561             }
562             }
563             }
564 0         0 croak "Couldn't guess format for writing file '$fname'";
565             }
566              
567             =item Chemistry::Mol->file($file, option => value ...)
568              
569             Create a L-derived object for reading or writing to a file.
570             The object can then be used to read the molecules or other information in the
571             file.
572              
573             This has more flexibility than calling C<< Chemistry::Mol->read >> when
574             dealing with multi-molecule files or files that have higher structure or that
575             have information that does not belong to the molecules themselves. For
576             example, a reaction file may have a list of molecules, but also general
577             information like the reaction name, yield, etc. as well as the classification
578             of the molecules as reactants or products. The exact information that is
579             available will depend on the file reader class that is being used. The
580             following is a hypothetical example for reading MDL rxnfiles.
581              
582             # assuming this module existed...
583             use Chemistry::File::Rxn;
584              
585             my $rxn = Chemistry::Mol->file('test.rxn');
586             $rxn->read;
587             $name = $rxn->name;
588             @reactants = $rxn->reactants; # mol objects
589             @products = $rxn->products;
590             $yield = $rxn->yield; # a number
591              
592             Note that only registered file readers will be used. Readers may be registered
593             using register_format(); modules that include readers (such as
594             Chemistry::File::PDB) usually register them automatically.
595              
596             =cut
597              
598             sub file {
599 1     1 1 4 my ($self, $file, %opts) = @_;
600 1         6 %opts = (mol_class => $self, %opts);
601              
602 1 50       5 if ($opts{format}) {
603 0         0 return $self->formats($opts{format})->new(file => $file,
604             opts => \%opts);
605             } else { # guess format
606 1         5 for my $type ($self->formats) {
607 1 50       5 if ($self->formats($type)->file_is($file)) {
608 1         15 return $self->formats($type)->new(file => $file,
609             opts => \%opts);
610             }
611             }
612             }
613 0         0 croak "Couldn't guess format of file '$file'";
614             }
615              
616             =item Chemistry::Mol->register_format($name, $ref)
617              
618             Register a file type. The identifier $name must be unique. $ref is either a
619             class name (a package) or an object that complies with the L
620             interface (e.g., a subclass of Chemistry::File). If $ref is omitted, the
621             calling package is used automatically. More than one format can be registered
622             at a time, but then $ref must be included for each format (e.g.,
623             Chemistry::Mol->register_format(format1 => "package1", format2 => package2).
624              
625             The typical user doesn't have to care about this function. It is used
626             automatically by molecule file I/O modules.
627              
628             =cut
629              
630             sub register_format {
631 14     14 1 36 my $class = shift;
632 14 100       83 if (@_ == 1) {
633 3         9 $FILE_FORMATS{$_[0]} = caller;
634 3         8 return;
635             }
636 11         67 my %opts = @_;
637 11         90 $FILE_FORMATS{$_} = $opts{$_} for keys %opts;
638             }
639              
640             =item Chemistry::Mol->formats
641              
642             Returns a list of the file formats that have been installed by
643             register_format()
644              
645             =cut
646              
647             sub formats {
648 68     68 1 95 my $self = shift;
649 68 100       131 if (@_) {
650 58         95 my ($type) = @_;
651 58         91 my $file_class = $FILE_FORMATS{$type};
652 58 50       117 unless ($file_class) {
653 0         0 croak "No class installed for type '$type'";
654             }
655 58         368 return $file_class;
656             } else {
657 10         61 return sort keys %FILE_FORMATS;
658             }
659             }
660              
661             =item $mol->mass
662              
663             Return the molar mass. This is just the sum of the masses of the atoms. See
664             L::mass for details such as the handling of isotopes.
665              
666             =cut
667              
668             sub mass {
669 2     2 1 9 my ($self) = @_;
670 2         5 my $mass = 0;
671 2         5 for my $atom ($self->atoms) {
672 6         12 $mass += $atom->mass;
673             }
674 2         13 $mass;
675             }
676              
677             =item $mol->charge
678              
679             Return the charge of the molecule. By default it returns the sum of the formal
680             charges of the atoms. However, it is possible to set an arbitrary charge by
681             calling C<< $mol->charge($new_charge) >>
682              
683             =cut
684              
685             sub charge {
686 0     0 1 0 my ($self) = shift;
687 0 0       0 if (@_) {
688 0         0 $self->{charge} = shift;
689 0         0 $self;
690             } else {
691 0 0       0 return $self->{charge} if defined $self->{charge};
692 0         0 my $charge = 0;
693 0   0     0 $charge += $_->formal_charge || 0 for $self->atoms;
694 0         0 $charge;
695             }
696             }
697              
698             =item $mol->formula_hash
699              
700             Returns a hash reference describing the molecular formula. For methane it would
701             return { C => 1, H => 4 }.
702              
703             =cut
704              
705             sub formula_hash {
706 17     17 1 25 my ($self) = @_;
707 17         26 my $formula = {};
708 17         26 for my $atom ($self->atoms) {
709 538         782 $formula->{$atom->symbol}++;
710 538 50       756 $formula->{H} += $atom->hydrogens if $atom->hydrogens;
711             }
712 17         37 $formula;
713             }
714              
715             =item $mol->formula($format)
716              
717             Returns a string with the formula. The format can be specified as a printf-like
718             string with the control sequences specified in the L
719             documentation.
720              
721             =cut
722              
723             sub formula {
724 5     5 1 1562 my ($self, $format) = @_;
725 5         406 require Chemistry::File::Formula;
726 5         15 $self->print(format => "formula", formula_format => $format);
727             }
728              
729             =item my $mol2 = $mol->clone;
730              
731             Makes a copy of a molecule. Note that this is a B copy; if your molecule
732             has a pointer to the rest of the universe, the entire universe will be cloned!
733              
734             By default, clone() uses L to copy the Perl data structure. L
735             can be used instead by setting variable C<$Chemistry::Mol::clone_backend> to
736             C (default is C). The documentation of Storable claims L
737             is less memory-intensive.
738              
739             =cut
740              
741             sub clone {
742 9     9 1 754 my ($self) = @_;
743 9         17 my $clone;
744 9 100       26 if ($clone_backend eq "Storable") {
    50          
745 8         1309 $clone = dclone $self;
746 8 50       116 $clone->_weaken if Storable->VERSION < 2.14;
747             } elsif ($clone_backend eq "Clone") {
748 1         383 require Clone;
749 1         2150 $clone = Clone::clone $self;
750             } else {
751 0         0 croak "Unknown clone backend '$clone_backend'";
752             }
753 9         38 $clone;
754             }
755              
756             =item my $mol2 = $mol->safe_clone;
757              
758             Like clone, it makes a deep copy of a molecule. The difference is that the copy
759             is not "exact" in that new molecule and its atoms and bonds get assigned new
760             IDs. This makes it safe to combine cloned molecules. For example, this is an
761             error:
762              
763             # XXX don't try this at home!
764             my $mol2 = Chemistry::Mol->combine($mol1, $mol1);
765             # the atoms in $mol1 will clash
766              
767             But this is ok:
768              
769             # the "safe clone" of $mol1 will have new IDs
770             my $mol2 = Chemistry::Mol->combine($mol1, $mol1->safe_clone);
771              
772             =cut
773              
774             sub safe_clone {
775 1     1 1 2 my ($mol) = @_;
776 1         5 my $clone = $mol->clone;
777 1         3 for ($clone, $clone->atoms, $clone->bonds) {
778 4         13 $_->id($_->nextID);
779             }
780 1         21 $clone;
781             }
782              
783             sub _weaken {
784 14     14   45 my ($self) = @_;
785 14         59 for ($self->atoms, $self->bonds) {
786 196         358 $_->_weaken;
787             }
788 14         33 $self;
789             }
790              
791             =item ($distance, $atom_here, $atom_there) = $mol->distance($obj)
792              
793             Returns the minimum distance to $obj, which can be an atom, a molecule, or a
794             vector. In scalar context it returns only the distance; in list context it
795             also returns the atoms involved. The current implementation for calculating
796             the minimum distance between two molecules compares every possible pair of
797             atoms, so it's not efficient for large molecules.
798              
799             =cut
800              
801             sub distance {
802 0     0 1 0 my ($self, $other) = @_;
803 0 0       0 if ($other->isa("Chemistry::Mol")) {
    0          
    0          
804 0         0 my @atoms = $self->atoms;
805 0 0       0 my $atom = shift @atoms or return; # need at least one atom
806 0         0 my $closest_here = $atom;
807 0         0 my ($min_length, $closest_there) = $atom->distance($other);
808 0         0 for $atom (@atoms) {
809 0         0 my ($d, $o) = $atom->distance($other);
810 0 0       0 if ($d < $min_length) {
811 0         0 ($min_length, $closest_there, $closest_here) = ($d, $o, $atom);
812             }
813             }
814             return wantarray ?
815 0 0       0 ($min_length, $closest_here, $closest_there) : $min_length;
816             } elsif ($other->isa("Chemistry::Atom")) {
817 0         0 return $other->distance($self);
818             } elsif ($other->isa("Math::VectorReal")) {
819 0         0 return Chemistry::Atom->new(coords => $other)->distance($self);
820             }
821             }
822              
823             =item my $bigmol = Chemistry::Mol->combine($mol1, $mol2, ...)
824              
825             =item $mol1->combine($mol2, $mol3, ...)
826              
827             Combines several molecules in one bigger molecule. If called as a class method,
828             as in the first example, it returns a new combined molecule without altering
829             any of the parameters. If called as an instance method, as in the second
830             example, all molecules are combined into $mol1 (but $mol2, $mol3, ...) are not
831             altered. B: Make sure you don't combine molecules which contain atoms
832             with duplicate IDs (for example, if they were cloned).
833              
834             =cut
835              
836             # joins several molecules into one
837             sub combine {
838 2     2 1 519 my ($self, @others) = @_;
839 2         4 my $mol;
840 2 100       5 if (ref $self) {
841 1         3 $mol = $self;
842             } else {
843 1         3 $mol = $self->new;
844             }
845 2         8 for my $other (@others) {
846 3         7 my $mol2 = $other->clone;
847 3         7 for my $atom ($mol2->atoms) {
848 12         18 $mol->add_atom($atom);
849             }
850 3         6 for my $bond ($mol2->bonds) {
851 9         15 $mol->add_bond($bond);
852             }
853             }
854 2         7 $mol;
855             }
856              
857             =item my @mols = $mol->separate
858              
859             Separates a molecule into "connected fragments". The original object is not
860             modified; the fragments are clones of the original ones. Example: if you have
861             ethane (H3CCH3) and you delete the C-C bond, you have two CH3 radicals within
862             one molecule object ($mol). When you call $mol->separate you get two molecules,
863             each one with a CH3.
864              
865             =cut
866              
867             # splits a molecule into connected fragments
868             # returns a list of molecules. Does not touch the original copy.
869             sub separate {
870 1     1 1 546 my ($self) = @_;
871 1         12 $self = $self->clone;
872 1         2 $self->{_paint_tab} = {};
873 1         2 my $color = 0;
874 1         3 for my $atom ($self->atoms) {
875 8 100       17 next if defined $self->{_paint_tab}{$atom->id};
876 2         6 $self->_paint($atom, $color++);
877             }
878 1         4 my @mols;
879 1         6 push @mols, $self->new for (1 .. $color);
880 1         6 for my $atom ($self->atoms) {
881 8         16 $mols[$self->{_paint_tab}{$atom->id}]->add_atom($atom);
882             }
883 1         2 for my $bond ($self->bonds) {
884 6         9 $mols[$self->{_paint_tab}{$bond->id}]->add_bond($bond);
885             }
886 1         8 @mols;
887             }
888              
889             # this method fills the _paint_tab attribute for every atom connected
890             # to the given start atom $atom with $color. Used for separating
891             # connected fragments. Uses a depth-first search
892             sub _paint {
893 14     14   20 my ($self, $atom, $color) = @_;
894 14 100       47 return if defined $self->{_paint_tab}{$atom->id};
895 8         19 $self->{_paint_tab}{$atom->id} = $color;
896 8         16 $self->{_paint_tab}{$_->id} = $color for ($atom->bonds);
897 8         16 for my $neighbor ($atom->neighbors) {
898 12         20 $self->_paint($neighbor, $color);
899             }
900             }
901              
902             =item $mol->sprout_hydrogens
903              
904             Convert all the implicit hydrogen atoms in the molecule to explicit atoms.
905             It does B generate coordinates for the atoms.
906              
907             =cut
908              
909             sub sprout_hydrogens {
910 1     1 1 3 my ($self) = @_;
911 1         4 $_->sprout_hydrogens for $self->atoms;
912             }
913              
914             =item $mol->collapse_hydrogens
915              
916             Convert all the explicit hydrogen atoms in the molecule to implicit hydrogens.
917             (Exception: hydrogen atoms that are adjacent to a hydrogen atom are not
918             collapsed.)
919              
920             =cut
921              
922             sub collapse_hydrogens {
923 1     1 1 4 my ($self) = @_;
924 1         4 for my $atom (grep { $_->symbol ne 'H' } $self->atoms) {
  3         11  
925 1         14 $atom->collapse_hydrogens;
926             }
927             }
928              
929             =item $mol->add_implicit_hydrogens
930              
931             Use heuristics to figure out how many implicit hydrogens should each atom in
932             the molecule have to satisfy its normal "organic" valence.
933              
934             =cut
935              
936             sub add_implicit_hydrogens {
937 1     1 1 5 my ($self) = @_;
938 1         3 $_->add_implicit_hydrogens for $self->atoms;
939             }
940              
941              
942             my %DESCRIPTORS = ();
943              
944             =item Chemistry::Mol->register_descriptor($name => $sub_ref)
945              
946             Adds a callback that can be used to add functionality to the molecule class
947             (originally meant to add custom molecule descriptors.) A descriptor is a
948             function that takes a molecule object as its only argument and returns a value
949             or values. For example, to add a descriptor function that computes the number
950             of atoms:
951              
952             Chemistry::Mol->register_descriptor(
953             number_of_atoms => sub {
954             my $mol = shift;
955             return scalar $mol->atoms;
956             }
957             );
958              
959             The descriptor is accessed by name via the C instance method:
960              
961             my $n = $mol->descriptor('number_of_atoms');
962              
963             =cut
964              
965             sub register_descriptor {
966 1     1 1 450 my ($self, %opts) = @_;
967 1         7 $DESCRIPTORS{$_} = $opts{$_} for keys %opts;
968             }
969              
970             =item my $value = $mol->descriptor($descriptor_name)
971              
972             Calls a previously registered descriptor function giving it $mol as an
973             argument, as shown above for C.
974              
975             =cut
976              
977             sub descriptor {
978 1     1 1 7 my ($self, $descriptor) = @_;
979 1 50       4 my $sub = $DESCRIPTORS{$descriptor}
980             or croak "unknown descriptor '$descriptor'";
981 1         5 return $sub->($self);
982             }
983              
984             1;
985              
986             =back
987              
988             =head1 SOURCE CODE REPOSITORY
989              
990             L
991              
992             =head1 SEE ALSO
993              
994             L, L, L,
995             L
996              
997             =head1 AUTHOR
998              
999             Ivan Tubert-Brohman Eitub@cpan.orgE
1000              
1001             =head1 COPYRIGHT
1002              
1003             Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is
1004             free software; you can redistribute it and/or modify it under the same terms as
1005             Perl itself.
1006              
1007             =cut
1008