File Coverage

blib/lib/Chemistry/Mol.pm

Criterion	Covered	Total	%
statement	240	334	71.8
branch	41	76	53.9
condition	2	7	28.5
subroutine	50	60	83.3
pod	37	45	82.2
total	370	522	70.8

line	stmt	bran	cond	sub	pod	time	code
1							package Chemistry::Mol;
2							$VERSION = '0.37';
3							# $Id: Mol.pm,v 1.50 2009/05/10 19:37:58 itubert Exp $
4
5							=head1 NAME
6
7							Chemistry::Mol - Molecule object toolkit
8
9							=head1 SYNOPSIS
10
11							use Chemistry::Mol;
12
13							$mol = Chemistry::Mol->new(id => "mol_id", name => "my molecule");
14							$c = $mol->new_atom(symbol => "C", coords => [0,0,0]);
15							$o = $mol->new_atom(symbol => "O", coords => [0,0,1.23]);
16							$mol->new_bond(atoms => [$c, $o], order => 3);
17
18							print $mol->print;
19
20							=head1 DESCRIPTION
21
22							This package, along with Chemistry::Atom and Chemistry::Bond, includes basic
23							objects and methods to describe molecules.
24
25							The core methods try not to enforce a particular convention. This means that
26							only a minimal set of attributes is provided by default, and some attributes
27							have very loosely defined meaning. This is because each program and file type
28							has different idea of what each concept (such as bond and atom type) means.
29							Bonds are defined as a list of atoms (typically two) with an arbitrary type.
30							Atoms are defined by a symbol and a Z, and may have 3D and internal coordinates
31							(2D coming soon).
32
33							=cut
34
35	17			17		208055	use 5.006;
	17					65
	17					717
36	17			17		119	use strict;
	17					30
	17					586
37	17			17		113	use warnings;
	17					35
	17					553
38	17			17		14771	use Chemistry::Atom;
	17					52
	17					1226
39	17			17		17087	use Chemistry::Bond;
	17					47
	17					489
40	17			17		293	use Carp;
	17					35
	17					1493
41	17			17		93	use base qw(Chemistry::Obj Exporter);
	17					30
	17					4017
42	17			17		47881	use Storable 'dclone';
	17					98601
	17					47434
43
44							our @EXPORT_OK = qw(read_mol);
45							our @EXPORT = ();
46							our %EXPORT_TAGS = (
47							all => [@EXPORT, @EXPORT_OK],
48							);
49
50
51
52							my %FILE_FORMATS = ();
53
54							=head1 METHODS
55
56							See also L for generic attributes.
57
58							=over 4
59
60							=item Chemistry::Mol->new(name => value, ...)
61
62							Create a new Mol object with the specified attributes.
63
64							$mol = Chemistry::Mol->new(id => 'm123', name => 'my mol')
65
66							is the same as
67
68							Chemistry::Mol->new()
69							$mol->id('m123')
70							$mol->name('my mol')
71
72							=cut
73
74							sub new {
75	26			26	1	1565	my $class = shift;
76	26					61	my %args = @_;
77	26		66			104	my $self = bless {
78							id => $class->nextID,
79							byId => {},
80							atoms => [],
81							bonds => [],
82							name => "",
83							}, ref $class \|\| $class;
84	26					137	$self->$_($args{$_}) for (keys %args);
85	26					92	return $self;
86							}
87
88							my $N = 0; # molecule ID counter
89	27			27	0	368	sub nextID { "mol".++$N; }
90	0			0	0	0	sub reset_id { $N = 0; }
91	0			0	0	0	sub next_id { $N = $_[1] }
92
93							=item $mol->add_atom($atom, ...)
94
95							Add one or more Atom objects to the molecule. Returns the last atom added.
96
97							=cut
98
99							sub add_atom {
100	552			552	1	1747	my $self = shift;
101	552					848	for my $atom (@_){
102							#if ($self->by_id($atom->id)) {
103							#croak "Duplicate ID when adding atom '$atom' to mol '$self'";
104							#}
105	553					757	push @{$self->{atoms}}, $atom;
	553					2199
106	553					2728	$self->{byId}{$atom->id} = $atom;
107	553					2628	$atom->parent($self);
108							}
109	552					2588	$_[-1];
110							}
111
112							sub add_atom_np {
113	0			0	0	0	my $self = shift;
114	0					0	for my $atom (@_){
115	0					0	push @{$self->{atoms}}, $atom;
	0					0
116	0					0	$self->{byId}{$atom->id} = $atom;
117							}
118	0					0	$_[-1];
119							}
120
121							=item $mol->atom_class
122
123							Returns the atom class that a molecule or molecule class expects to use by
124							default. L objects return "Chemistry::Atom", but subclasses
125							will likely override this method.
126
127							=cut
128
129							sub atom_class {
130	14			14	1	63	"Chemistry::Atom";
131							}
132
133							=item $mol->new_atom(name => value, ...)
134
135							Shorthand for C<< $mol->add_atom($mol->atom_class->new(name => value, ...)) >>.
136
137							=cut
138
139							sub new_atom {
140	14			14	1	80	my $self = shift;
141	14					36	$self->add_atom($self->atom_class->new(@_));
142							}
143
144							=item $mol->delete_atom($atom, ...)
145
146							Deletes an atom from the molecule. It automatically deletes all the bonds in
147							which the atom participates as well. $atom should be a Chemistry::Atom
148							reference. This method also accepts the atom index, but this use is deprecated
149							(and buggy if multiple indices are given, unless they are in descending order).
150
151							=cut
152
153							sub delete_atom {
154	2			2	1	5	my $self = shift;
155	2					6	for my $i (@_) {
156	2					4	my ($atom);
157	2	100				7	if (ref $i) {
158	1					3	$atom = $i;
159							} else {
160	1	50				4	$atom = $self->atoms($i)
161							or croak "$self->delete_atom: no such atom $i\n";
162							}
163	2					10	$atom->delete($i);
164							}
165							}
166
167							# takes an atom ref to delete and optionally the atom index
168							# 1) deletes bonds that belonged to atom
169							# 2) deletes atom
170							sub _delete_atom {
171	7			7		13	my ($self, $atom) = @_;
172	7	50				32	my $index = $self->get_atom_index($atom)
173							or croak "$self->delete_atom: no such atom $atom\n";
174	7					39	my $id = $atom->id;
175	7					31	$self->delete_bond($atom->bonds);
176	7					56	delete $self->{byId}{$id};
177	7					25	splice @{$self->{atoms}}, $index - 1, 1;
	7					41
178							}
179
180							=item $mol->add_bond($bond, ...)
181
182							Add one or more Bond objects to the molecule. Returns the last bond added.
183
184							=cut
185
186							sub add_bond {
187	25			25	1	45	my $self = shift;
188	25					43	for my $bond (@_){
189							#if ($self->by_id($bond->id)) {
190							#croak "Duplicate ID when adding bond '$bond' to mol '$self'";
191							#}
192	25					34	push @{$self->{bonds}}, $bond;
	25					55
193	25					107	$self->{byId}{$bond->id} = $bond;
194	25	100				106	if ($bond->{deleted}) {
195	1					4	$_->add_bond($bond) for $bond->atoms;
196	1					3	$bond->{deleted} = 0;
197							}
198	25					88	$bond->parent($self);
199							}
200	25					71	$_[-1];
201							}
202
203							sub add_bond_np {
204	0			0	0	0	my $self = shift;
205	0					0	for my $bond (@_){
206	0					0	push @{$self->{bonds}}, $bond;
	0					0
207	0					0	$self->{byId}{$bond->id} = $bond;
208							}
209	0					0	$_[-1];
210							}
211
212							=item $mol->bond_class
213
214							Returns the bond class that a molecule or molecule class expects to use by
215							default. L objects return "Chemistry::Bond", but subclasses
216							will likely override this method.
217
218							=cut
219
220							sub bond_class {
221	8			8	1	122	"Chemistry::Bond";
222							}
223
224							=item $mol->new_bond(name => value, ...)
225
226							Shorthand for C<< $mol->add_bond($mol->bond_class->new(name => value, ...)) >>.
227
228							=cut
229
230							sub new_bond {
231	8			8	1	22	my $self = shift;
232	8					29	$self->add_bond($self->bond_class->new(@_));
233							}
234
235							sub get_bond_index {
236	14			14	0	23	my ($self, $bond) = @_;
237	14					43	my $i;
238	14					37	for ($self->bonds) {
239	42					59	++$i;
240	42	100				99	return $i if ($_ eq $bond);
241							}
242	0					0	undef;
243							}
244
245							sub get_atom_index {
246	7			7	0	14	my ($self, $atom) = @_;
247	7					12	my $i;
248	7					21	for ($self->atoms) {
249	12					20	++$i;
250	12	100				41	return $i if ($_ eq $atom);
251							}
252	0					0	undef;
253							}
254
255							=item $mol->delete_bond($bond, ...)
256
257							Deletes a bond from the molecule. $bond should be a L object.
258
259							=cut
260
261							# mol deletes bond
262							# bond tells atoms involved to forget about it
263
264							sub delete_bond {
265	7			7	1	18	my $self = shift;
266	7					19	for my $i (@_){
267	11					17	my ($bond);
268	11	50				31	if (ref $i) {
269	11					15	$bond = $i;
270							} else {
271	0	0				0	$bond = $self->bonds($i)
272							or croak "$self->delete_bond($i): no such bond $i\n";
273							}
274	11					125	$bond->delete;
275							}
276							}
277
278							sub _delete_bond {
279	14			14		23	my ($self, $bond) = @_;
280	14	50				44	my $index = $self->get_bond_index($bond)
281							#or croak "$self->delete_bond: no such bond $bond\n";
282							or return;
283	14					63	my $id = $bond->id;
284	14					39	delete $self->{byId}{$id};
285	14					24	splice @{$self->{bonds}}, $index - 1, 1;
	14					39
286	14					64	$bond->delete_atoms;
287							}
288
289							=item $mol->by_id($id)
290
291							Return the atom or bond object with the corresponding id.
292
293							=cut
294
295							sub by_id {
296	3			3	1	7	my $self = shift;
297	3					6	my ($id) = @_;
298	3					20	$self->{byId}{$id};
299							}
300
301							sub _change_id {
302	4			4		118	my ($self, $old_id, $new_id) = @_;
303	4					11	my $ref = $self->{byId}{$old_id};
304	4					13	$self->{byId}{$new_id} = $ref;
305	4					18	delete $self->{byId}{$old_id};
306							}
307
308							=item $mol->atoms($n1, ...)
309
310							Returns the atoms with the given indices, or all by default.
311							Indices start from one, not from zero.
312
313							=cut
314
315							sub atoms {
316	90			90	1	4773	my $self = shift;
317	90	100				266	if (@_) {
318	18					41	my @ats = map {$_ - 1} @_;
	24					76
319	18					34	@{$self->{atoms}}[@ats];
	18					121
320							} else {
321	72					92	@{$self->{atoms}};
	72					453
322							}
323							}
324
325							=item $mol->atoms_by_name($name)
326
327							Returns the atoms with the given name (treated as an anchored regular
328							expression).
329
330							=cut
331
332							sub atoms_by_name {
333	1			1	1	3	my $self = shift;
334	1					20	my $re = qr/^$_[0]$/;
335	17			17		226	no warnings;
	17					43
	17					10043
336	1					5	my @ret = grep {$_->name =~ $re} $self->atoms;
	2					8
337	1	50				10	wantarray ? @ret : $ret[0];
338							}
339
340							=item $mol->sort_atoms($sub_ref)
341
342							Sort the atoms in the molecule by using the comparison function given in
343							$sub_ref. This function should take two atoms as parameters and return -1, 0,
344							or 1 depending on whether the first atom should go before, same, or after the
345							second atom. For example, to sort by atomic number, you could use the
346							following:
347
348							$mol->sort_atoms( sub { $_[0]->Z <=> $_[1]->Z } );
349
350							Note that the atoms are passed as parameters and not as the package variables
351							$a and $b like the core sort function does. This is because $mol->sort will
352							likely be called from another package and we don't want to play with another
353							package's symbol table.
354
355							=cut
356
357							sub sort_atoms {
358	0			0	1	0	my ($self, $sub) = @_;
359	0					0	my @a = $self->atoms;
360	0					0	@a = sort { $sub->($a,$b) } @a;
	0					0
361	0					0	$self->{atoms} = \@a;
362	0					0	$self;
363							}
364
365							=item $mol->bonds($n1, ...)
366
367							Returns the bonds with the given indices, or all by default.
368							Indices start from one, not from zero.
369
370							=cut
371
372							sub bonds {
373	49			49	1	531	my $self = shift;
374	49	100				507	if (@_) {
375	6					16	my @bonds = map {$_ - 1} @_;
	6					24
376	6					14	@{$self->{bonds}}[@bonds];
	6					44
377							} else {
378	43					60	@{$self->{bonds}};
	43					181
379							}
380							}
381
382							=item $mol->print(option => value...)
383
384							Convert the molecule to a string representation. If no options are given,
385							a default YAML-like format is used (this may change in the future). Otherwise,
386							the format should be specified by using the C option.
387
388							=cut
389
390							sub print {
391	19			19	1	145	my $self = shift;
392	19					58	my (%opts) = @_;
393	19					26	my $ret;
394	19					31	local $" = ""; #"
395
396	19	50				126	if ($opts{format}) {
397	19					57	return $self->formats($opts{format})->write_string($self, %opts);
398							}
399							# else use default printout
400	0					0	$ret = <
401							$self->{id}:
402							name: $self->{name}
403							END
404	0					0	$ret .= " attr:\n";
405	0					0	$ret .= $self->print_attr(2);
406	0					0	$ret .= " atoms:\n";
407	0					0	for my $a (@{$self->{atoms}}) { $ret .= $a->print(2) }
	0					0
	0					0
408	0					0	$ret .= " bonds:\n";
409	0					0	for my $b (@{$self->{bonds}}) { $ret .= $b->print(2) }
	0					0
	0					0
410	0					0	$ret;
411							}
412
413							=item $s = $mol->sprintf($format)
414
415							Format interesting molecular information in a concise way, as specified by
416							a printf-like format.
417
418							%n - name
419							%f - formula
420							%f{formula with format} - (note: right braces within
421							the format should be escaped with a backslash)
422							%s - SMILES representation
423							%S - canonical SMILES representation
424							%m - mass
425							%8.3m - mass, formatted as %8.3f with core sprintf
426							%q - formal charge
427							%a - atom count
428							%b - bond count
429							%t - type
430							%i - id
431							%% - %
432
433							For example, if you want just about everything:
434
435							$mol->sprintf("%s - %n (%f). %a atoms, %b bonds; "
436							. "mass=%m; charge =%q; type=%t; id=%i");
437
438							Note that you have to C before using C<%s> or
439							C<%S> on C<< $mol->sprintf >>.
440
441							=cut
442
443							sub sprintf {
444	0			0	1	0	my ($mol, $format) = @_;
445	17			17		125	no warnings 'uninitialized'; # don't care if some properties are undefined
	17					37
	17					63099
446	0		0			0	$format \|\|= "%f";
447	0					0	$format =~ s/%%/\\%/g; # escape %% with a \
448	0					0	$format =~ s/(?formula($1)/eg; # %f{}
	0					0
449	0					0	$format =~ s/(?formula/eg; # %f
	0					0
450	0					0	$format =~ s/(?print(format=>'smiles')/eg; # %s
	0					0
451	0					0	$format =~ s/(?print(format=>'smiles', unique => 1)/eg; # %s
	0					0
452	0					0	$format =~ s/(?name/eg; # %n
	0					0
453	0					0	$format =~ s/(?
454	0	0				0	$1 ? sprintf "%$1f", $mol->mass : $mol->mass/eg; # %m
455	0					0	$format =~ s/(?charge/eg; # %q
	0					0
456	0					0	$format =~ s/(?atoms/eg; # %a
	0					0
457	0					0	$format =~ s/(?bonds/eg; # %b
	0					0
458	0					0	$format =~ s/(?type/eg; # %t
	0					0
459	0					0	$format =~ s/(?id/eg; # %i
	0					0
460	0					0	$format =~ s/\\(.)/$1/g; # other \ escapes
461	0					0	$format;
462							}
463
464							=item $mol->printf($format)
465
466							Same as C<< $mol->sprintf >>, but prints to standard output automatically.
467							Used for quick and dirty molecular information dumping.
468
469							=cut
470
471							sub printf {
472	0			0	1	0	my ($mol, $format) = @_;
473	0					0	print $mol->sprintf($format);
474							}
475
476							=item Chemistry::Mol->parse($string, option => value...)
477
478							Parse the molecule encoded in C<$string>. The format should be specified
479							with the the C option; otherwise, it will be guessed.
480
481							=cut
482
483							sub parse {
484	14			14	1	13839	my $self = shift;
485	14					26	my $s = shift;
486	14					62	my %opts = (mol_class => $self, @_);
487
488	14	50				41	if ($opts{format}) {
489	14					54	return $self->formats($opts{format})->parse_string($s, %opts);
490							} else {
491	0					0	croak "Parse does not support autodetection yet.",
492							"Please specify a format.";
493							}
494	0					0	return;
495							}
496
497							=item Chemistry::Mol->read($fname, option => value ...)
498
499							Read a file and return a list of Mol objects, or croaks if there was a problem.
500							The type of file will be guessed if not specified via the C option.
501
502							Note that only registered file readers will be used. Readers may be registered
503							using C; modules that include readers (such as
504							L) usually register them automatically when they are
505							loaded.
506
507							Automatic decompression of gzipped files is supported if the L
508							module is installed. Files ending in .gz are assumed to be compressed;
509							otherwise it is possible to force decompression by passing the gzip => 1
510							option (or no decompression with gzip => 0).
511
512							=cut
513
514							sub read_mol { # for backwards compatibility
515	0			0	0	0	my ($fname, $type) = shift;
516	0					0	__PACKAGE__->read($fname, format => $type);
517							}
518
519							sub read {
520	11			11	1	90902	my $self = shift;
521	11					27	my $fname = shift;
522	11					57	my %opts = (mol_class => $self, @_);
523
524	11	100				50	if ($opts{format}) {
525	3					18	return $self->formats($opts{format})->parse_file($fname, %opts);
526							} else { # guess format
527	8					41	for my $type ($self->formats) {
528	8	50				30	if ($self->formats($type)->file_is($fname)) {
529	8					31	return $self->formats($type)->parse_file($fname, %opts);
530							}
531							}
532							}
533	0					0	croak "Couldn't guess format of file '$fname'";
534							}
535
536							=item $mol->write($fname, option => value ...)
537
538							Write a molecule file, or croak if there was a problem. The type of file will
539							be guessed if not specified via the C option.
540
541							Note that only registered file formats will be used.
542
543							Automatic gzip compression is supported if the IO::Zlib module is installed.
544							Files ending in .gz are assumed to be compressed; otherwise it is possible to
545							force compression by passing the gzip => 1 option (or no compression with gzip
546							=> 0). Specific compression levels between 2 (fastest) and 9 (most compressed)
547							may also be used (e.g., gzip => 9).
548
549							=cut
550
551							sub write {
552	3			3	1	840	my ($self, $fname, %opts) = (@_);
553
554	3	100				15	if ($opts{format}) {
555	2					12	return $self->formats($opts{format})->write_file(@_);
556							} else { # guess format
557	1					5	for my $type ($self->formats) {
558	1	50				14	if ($self->formats($type)->name_is($fname)) {
559	1					6	return $self->formats($type)->write_file(@_);
560							}
561							}
562							}
563	0					0	croak "Couldn't guess format for writing file '$fname'";
564							}
565
566							=item Chemistry::Mol->file($file, option => value ...)
567
568							Create a L-derived object for reading or writing to a file.
569							The object can then be used to read the molecules or other information in the
570							file.
571
572							This has more flexibility than calling C<< Chemistry::Mol->read >> when
573							dealing with multi-molecule files or files that have higher structure or that
574							have information that does not belong to the molecules themselves. For
575							example, a reaction file may have a list of molecules, but also general
576							information like the reaction name, yield, etc. as well as the classification
577							of the molecules as reactants or products. The exact information that is
578							available will depend on the file reader class that is being used. The
579							following is a hypothetical example for reading MDL rxnfiles.
580
581							# assuming this module existed...
582							use Chemistry::File::Rxn;
583
584							my $rxn = Chemistry::Mol->file('test.rxn');
585							$rxn->read;
586							$name = $rxn->name;
587							@reactants = $rxn->reactants; # mol objects
588							@products = $rxn->products;
589							$yield = $rxn->yield; # a number
590
591							Note that only registered file readers will be used. Readers may be registered
592							using register_format(); modules that include readers (such as
593							Chemistry::File::PDB) usually register them automatically.
594
595							=cut
596
597							sub file {
598	1			1	1	4	my ($self, $file, %opts) = @_;
599	1					6	%opts = (mol_class => $self, %opts);
600
601	1	50				9	if ($opts{format}) {
602	0					0	return $self->formats($opts{format})->new(file => $file,
603							opts => \%opts);
604							} else { # guess format
605	1					5	for my $type ($self->formats) {
606	1	50				5	if ($self->formats($type)->file_is($file)) {
607	1					4	return $self->formats($type)->new(file => $file,
608							opts => \%opts);
609							}
610							}
611							}
612	0					0	croak "Couldn't guess format of file '$file'";
613							}
614
615							=item Chemistry::Mol->register_format($name, $ref)
616
617							Register a file type. The identifier $name must be unique. $ref is either a
618							class name (a package) or an object that complies with the L
619							interface (e.g., a subclass of Chemistry::File). If $ref is omitted, the
620							calling package is used automatically. More than one format can be registered
621							at a time, but then $ref must be included for each format (e.g.,
622							Chemistry::Mol->register_format(format1 => "package1", format2 => package2).
623
624							The typical user doesn't have to care about this function. It is used
625							automatically by molecule file I/O modules.
626
627							=cut
628
629							sub register_format {
630	15			15	1	21529	my $class = shift;
631	15	100				112	if (@_ == 1) {
632	4					20	$FILE_FORMATS{$_[0]} = caller;
633	4					18	return;
634							}
635	11					51	my %opts = @_;
636	11					105	$FILE_FORMATS{$_} = $opts{$_} for keys %opts;
637							}
638
639							=item Chemistry::Mol->formats
640
641							Returns a list of the file formats that have been installed by
642							register_format()
643
644							=cut
645
646							sub formats {
647	68			68	1	123	my $self = shift;
648	68	100				172	if (@_) {
649	58					87	my ($type) = @_;
650	58					117	my $file_class = $FILE_FORMATS{$type};
651	58	50				179	unless ($file_class) {
652	0					0	croak "No class installed for type '$type'";
653							}
654	58					554	return $file_class;
655							} else {
656	10					68	return sort keys %FILE_FORMATS;
657							}
658							}
659
660							=item $mol->mass
661
662							Return the molar mass. This is just the sum of the masses of the atoms. See
663							L::mass for details such as the handling of isotopes.
664
665							=cut
666
667							sub mass {
668	2			2	1	8	my ($self) = @_;
669	2					4	my $mass = 0;
670	2					6	for my $atom ($self->atoms) {
671	6					17	$mass += $atom->mass;
672							}
673	2					14	$mass;
674							}
675
676							=item $mol->charge
677
678							Return the charge of the molecule. By default it returns the sum of the formal
679							charges of the atoms. However, it is possible to set an arbitrary charge by
680							calling C<< $mol->charge($new_charge) >>
681
682							=cut
683
684							sub charge {
685	0			0	1	0	my ($self) = shift;
686	0	0				0	if (@_) {
687	0					0	$self->{charge} = shift;
688	0					0	$self;
689							} else {
690	0	0				0	return $self->{charge} if defined $self->{charge};
691	0					0	my $charge = 0;
692	0		0			0	$charge += $_->formal_charge \|\| 0 for $self->atoms;
693	0					0	$charge;
694							}
695							}
696
697							=item $mol->formula_hash
698
699							Returns a hash reference describing the molecular formula. For methane it would
700							return { C => 1, H => 4 }.
701
702							=cut
703
704							sub formula_hash {
705	17			17	1	73	my ($self) = @_;
706	17					26	my $formula = {};
707	17					56	for my $atom ($self->atoms) {
708	538					1444	$formula->{$atom->symbol}++;
709	538	50				1601	$formula->{H} += $atom->hydrogens if $atom->hydrogens;
710							}
711	17					214	$formula;
712							}
713
714							=item $mol->formula($format)
715
716							Returns a string with the formula. The format can be specified as a printf-like
717							string with the control sequences specified in the L
718							documentation.
719
720							=cut
721
722							sub formula {
723	5			5	1	1547	my ($self, $format) = @_;
724	5					1072	require Chemistry::File::Formula;
725	5					22	$self->print(format => "formula", formula_format => $format);
726							}
727
728							=item my $mol2 = $mol->clone;
729
730							Makes a copy of a molecule. Note that this is a B copy; if your molecule
731							has a pointer to the rest of the universe, the entire universe will be cloned!
732
733							=cut
734
735							sub clone {
736	8			8	1	15	my ($self) = @_;
737	8					2328	my $clone = dclone $self;
738	8	50				155	$clone->_weaken if Storable->VERSION < 2.14;
739	8					33	$clone;
740							}
741
742							=item my $mol2 = $mol->safe_clone;
743
744							Like clone, it makes a deep copy of a molecule. The difference is that the copy
745							is not "exact" in that new molecule and its atoms and bonds get assigned new
746							IDs. This makes it safe to combine cloned molecules. For example, this is an
747							error:
748
749							# XXX don't try this at home!
750							my $mol2 = Chemistry::Mol->combine($mol1, $mol1);
751							# the atoms in $mol1 will clash
752
753							But this is ok:
754
755							# the "safe clone" of $mol1 will have new IDs
756							my $mol2 = Chemistry::Mol->combine($mol1, $mol1->safe_clone);
757
758							=cut
759
760							sub safe_clone {
761	1			1	1	2	my ($mol) = @_;
762	1					3	my $clone = $mol->clone;
763	1					4	for ($clone, $clone->atoms, $clone->bonds) {
764	4					17	$_->id($_->nextID);
765							}
766	1					3	$clone;
767							}
768
769							sub _weaken {
770	14			14		36	my ($self) = @_;
771	14					70	for ($self->atoms, $self->bonds) {
772	196					581	$_->_weaken;
773							}
774	14					51	$self;
775							}
776
777							=item ($distance, $atom_here, $atom_there) = $mol->distance($obj)
778
779							Returns the minimum distance to $obj, which can be an atom, a molecule, or a
780							vector. In scalar context it returns only the distance; in list context it
781							also returns the atoms involved. The current implementation for calculating
782							the minimum distance between two molecules compares every possible pair of
783							atoms, so it's not efficient for large molecules.
784
785							=cut
786
787							sub distance {
788	0			0	1	0	my ($self, $other) = @_;
789	0	0				0	if ($other->isa("Chemistry::Mol")) {
		0
		0
790	0					0	my @atoms = $self->atoms;
791	0	0				0	my $atom = shift @atoms or return; # need at least one atom
792	0					0	my $closest_here = $atom;
793	0					0	my ($min_length, $closest_there) = $atom->distance($other);
794	0					0	for $atom (@atoms) {
795	0					0	my ($d, $o) = $atom->distance($other);
796	0	0				0	if ($d < $min_length) {
797	0					0	($min_length, $closest_there, $closest_here) = ($d, $o, $atom);
798							}
799							}
800							return wantarray ?
801	0	0				0	($min_length, $closest_here, $closest_there) : $min_length;
802							} elsif ($other->isa("Chemistry::Atom")) {
803	0					0	return $other->distance($self);
804							} elsif ($other->isa("Math::VectorReal")) {
805	0					0	return Chemistry::Atom->new(coords => $other)->distance($self);
806							}
807							}
808
809							=item my $bigmol = Chemistry::Mol->combine($mol1, $mol2, ...)
810
811							=item $mol1->combine($mol2, $mol3, ...)
812
813							Combines several molecules in one bigger molecule. If called as a class method,
814							as in the first example, it returns a new combined molecule without altering
815							any of the parameters. If called as an instance method, as in the second
816							example, all molecules are combined into $mol1 (but $mol2, $mol3, ...) are not
817							altered. B: Make sure you don't combine molecules which contain atoms
818							with duplicate IDs (for example, if they were cloned).
819
820							=cut
821
822							# joins several molecules into one
823							sub combine {
824	2			2	1	455	my ($self, @others) = @_;
825	2					4	my $mol;
826	2	100				6	if (ref $self) {
827	1					3	$mol = $self;
828							} else {
829	1					5	$mol = $self->new;
830							}
831	2					5	for my $other (@others) {
832	3					9	my $mol2 = $other->clone;
833	3					8	for my $atom ($mol2->atoms) {
834	12					59	$mol->add_atom($atom);
835							}
836	3					10	for my $bond ($mol2->bonds) {
837	9					24	$mol->add_bond($bond);
838							}
839							}
840	2					7	$mol;
841							}
842
843							=item my @mols = $mol->separate
844
845							Separates a molecule into "connected fragments". The original object is not
846							modified; the fragments are clones of the original ones. Example: if you have
847							ethane (H3CCH3) and you delete the C-C bond, you have two CH3 radicals within
848							one molecule object ($mol). When you call $mol->separate you get two molecules,
849							each one with a CH3.
850
851							=cut
852
853							# splits a molecule into connected fragments
854							# returns a list of molecules. Does not touch the original copy.
855							sub separate {
856	1			1	1	946	my ($self) = @_;
857	1					5	$self = $self->clone;
858	1					4	$self->{_paint_tab} = {};
859	1					3	my $color = 0;
860	1					6	for my $atom ($self->atoms) {
861	8	100				28	next if defined $self->{_paint_tab}{$atom->id};
862	2					10	$self->_paint($atom, $color++);
863							}
864	1					3	my @mols;
865	1					6	push @mols, $self->new for (1 .. $color);
866	1					4	for my $atom ($self->atoms) {
867	8					25	$mols[$self->{_paint_tab}{$atom->id}]->add_atom($atom);
868							}
869	1					5	for my $bond ($self->bonds) {
870	6					18	$mols[$self->{_paint_tab}{$bond->id}]->add_bond($bond);
871							}
872	1					19	@mols;
873							}
874
875							# this method fills the _paint_tab attribute for every atom connected
876							# to the given start atom $atom with $color. Used for separating
877							# connected fragments. Uses a depth-first search
878							sub _paint {
879	14			14		22	my ($self, $atom, $color) = @_;
880	14	100				34	return if defined $self->{_paint_tab}{$atom->id};
881	8					22	$self->{_paint_tab}{$atom->id} = $color;
882	8					21	$self->{_paint_tab}{$_->id} = $color for ($atom->bonds);
883	8					23	for my $neighbor ($atom->neighbors) {
884	12					25	$self->_paint($neighbor, $color);
885							}
886							}
887
888							=item $mol->sprout_hydrogens
889
890							Convert all the implicit hydrogen atoms in the molecule to explicit atoms.
891							It does B generate coordinates for the atoms.
892
893							=cut
894
895							sub sprout_hydrogens {
896	1			1	1	2	my ($self) = @_;
897	1					4	$_->sprout_hydrogens for $self->atoms;
898							}
899
900							=item $mol->collapse_hydrogens
901
902							Convert all the explicit hydrogen atoms in the molecule to implicit hydrogens.
903							(Exception: hydrogen atoms that are adjacent to a hydrogen atom are not
904							collapsed.)
905
906							=cut
907
908							sub collapse_hydrogens {
909	1			1	1	3	my ($self) = @_;
910	1					5	for my $atom (grep { $_->symbol ne 'H' } $self->atoms) {
	3					12
911	1					7	$atom->collapse_hydrogens;
912							}
913							}
914
915							=item $mol->add_implicit_hydrogens
916
917							Use heuristics to figure out how many implicit hydrogens should each atom in
918							the molecule have to satisfy its normal "organic" valence.
919
920							=cut
921
922							sub add_implicit_hydrogens {
923	1			1	1	7	my ($self) = @_;
924	1					5	$_->add_implicit_hydrogens for $self->atoms;
925							}
926
927
928							my %DESCRIPTORS = ();
929
930							=item Chemistry::Mol->register_descriptor($name => $sub_ref)
931
932							Adds a callback that can be used to add functionality to the molecule class
933							(originally meant to add custom molecule descriptors.) A descriptor is a
934							function that takes a molecule object as its only argument and returns a value
935							or values. For example, to add a descriptor function that computes the number
936							of atoms:
937
938							Chemistry::Mol->register_descriptor(
939							number_of_atoms => sub {
940							my $mol = shift;
941							return scalar $mol->atoms;
942							}
943							);
944
945							The descriptor is accessed by name via the C instance method:
946
947							my $n = $mol->descriptor('number_of_atoms');
948
949							=cut
950
951							sub register_descriptor {
952	1			1	1	74	my ($self, %opts) = @_;
953	1					8	$DESCRIPTORS{$_} = $opts{$_} for keys %opts;
954							}
955
956							=item my $value = $mol->descriptor($descriptor_name)
957
958							Calls a previously registered descriptor function giving it $mol as an
959							argument, as shown above for C.
960
961							=cut
962
963							sub descriptor {
964	1			1	1	3	my ($self, $descriptor) = @_;
965	1	50				6	my $sub = $DESCRIPTORS{$descriptor}
966							or croak "unknown descriptor '$descriptor'";
967	1					5	return $sub->($self);
968							}
969
970							1;
971
972							=back
973
974							=head1 VERSION
975
976							0.37
977
978							=head1 SEE ALSO
979
980							L, L, L,
981							L
982
983							The PerlMol website L
984
985							=head1 AUTHOR
986
987							Ivan Tubert-Brohman Eitub@cpan.orgE
988
989							=head1 COPYRIGHT
990
991							Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is
992							free software; you can redistribute it and/or modify it under the same terms as
993							Perl itself.
994
995							=cut
996