File Coverage

blib/lib/Chemistry/File/SMILES.pm

Criterion	Covered	Total	%
statement	244	258	94.5
branch	94	112	83.9
condition	41	53	77.3
subroutine	31	34	91.1
pod	5	22	22.7
total	415	479	86.6

line	stmt	bran	cond	sub	pod	time	code
1							package Chemistry::File::SMILES;
2
3							$VERSION = "0.47";
4							# $Id: SMILES.pm,v 1.16 2009/05/10 20:31:08 itubert Exp $
5
6	8			8		136676	use 5.006;
	8					30
	8					604
7	8			8		50	use strict;
	8					19
	8					775
8	8			8		46	use warnings;
	8					20
	8					373
9	8			8		43	no warnings 'recursion';
	8					21
	8					584
10	8			8		47	use base "Chemistry::File";
	8					15
	8					41531
11	8			8		429306	use Chemistry::Mol;
	8					725461
	8					1251
12	8			8		15014	use Chemistry::Bond::Find 'assign_bond_orders';
	8					248080
	8					1029
13	8			8		500	use List::Util 'first';
	8					17
	8					6518
14	8			8		54	use Carp;
	8					15
	8					21312
15
16
17							=head1 NAME
18
19							Chemistry::File::SMILES - SMILES linear notation parser/writer
20
21							=head1 SYNOPSYS
22
23							#!/usr/bin/perl
24							use Chemistry::File::SMILES;
25
26							# parse a SMILES string
27							my $s = 'C1CC1(=O)[O-]';
28							my $mol = Chemistry::Mol->parse($s, format => 'smiles');
29
30							# print a SMILES string
31							print $mol->print(format => 'smiles');
32
33							# print a unique (canonical) SMILES string
34							print $mol->print(format => 'smiles', unique => 1);
35
36							# parse a SMILES file
37							my @mols = Chemistry::Mol->read("file.smi", format => 'smiles');
38
39							# write a multiline SMILES file
40							Chemistry::Mol->write("file.smi", mols => \@mols);
41
42
43							=head1 DESCRIPTION
44
45							This module parses a SMILES (Simplified Molecular Input Line Entry
46							Specification) string. This is a File I/O driver for the PerlMol project.
47							L. It registers the 'smiles' format with
48							Chemistry::Mol.
49
50							This parser interprets anything after whitespace as the molecule's name;
51							for example, when the following SMILES string is parsed, $mol->name will be
52							set to "Methyl chloride":
53
54							CCl Methyl chloride
55
56							The name is not included by default on output. However, if the C option
57							is defined, the name will be included after the SMILES string, separated by a
58							tab.
59
60							print $mol->print(format => 'smiles', name => 1);
61
62							=head2 Multiline SMILES and SMILES files
63
64							A file or string can contain multiple molecules, one per line.
65
66							CCl Methyl chloride
67							CO Methanol
68
69							Files with the extension '.smi' are assumed to have this format.
70
71							=head2 Atom Mapping Numbers
72
73							As an extension for reaction processing, SMILES strings may have atom mapping
74							numbers, which are introduced after a colon in a bracketed atom. For example,
75							[C:1]. The mapping number need not be unique. This module reads the mapping
76							numbers and stores them as the name of the atom ($atom->name).
77
78							On output, atom names are not included by default. See the C and
79							C options below for ways of including them.
80
81							head1 OPTIONS
82
83							The following options are supported in addition to the options mentioned for
84							L, such as C, C, and C.
85
86							=over
87
88							=item aromatic
89
90							On output, detect aromatic atoms and bonds by means of the Chemistry::Ring
91							module, and represent the organic aromatic atoms with lowercase symbols.
92
93							=item unique
94
95							When used on output, canonicalize the structure if it hasn't been canonicalized
96							already and generate a unique SMILES string. This option implies "aromatic".
97
98							=item number
99
100							For atoms that have a defined name, print the name as the "atom number". For
101							example, if an ethanol molecule has the name "42" for the oxygen atom and the
102							other atoms have undefined names, the output would be:
103
104							CC[OH:42]
105
106							=item auto_number
107
108							When used on output, number all the atoms explicitly and sequentially. The
109							output for ethanol would look something like this:
110
111							[CH3:1][CH2:2][OH:3]
112
113							=item name
114
115							Include the molecule name on output, as described in the previous section.
116
117							=item kekulize
118
119							When used on input, assign single or double bond orders to "aromatic" or
120							otherwise unspecified bonds (i.e., generate the Kekule structure). If false,
121							the bond orders will remain single. This option is true by default. This uses
122							C from the L module.
123
124							=back
125
126							=cut
127
128							# INITIALIZATION
129							Chemistry::Mol->register_format('smiles');
130							my $Smiles_parser = __PACKAGE__->new_parser;
131
132							#=begin comment
133							#
134							#=over
135							#
136							#=cut
137
138							sub file_is {
139	0			0	1	0	my $self = shift;
140	0					0	$self->name_is(@_);
141							}
142
143							sub name_is {
144	0			0	1	0	my ($self, $name) = @_;
145	0					0	$name =~ /\.smi/;
146							}
147
148							sub slurp_mol {
149	0			0	1	0	my ($self, $fh) = @_;
150	0					0	scalar <$fh>;
151							}
152
153							sub read_mol {
154	111			111	1	89396	my ($self, $fh, %opts) = @_;
155	111					852	%opts = (kekulize => 1, %opts);
156	111		50			519	my $mol_class = $opts{mol_class} \|\| "Chemistry::Mol";
157
158	111					639	my $line = <$fh>;
159	111	100				568	return unless defined $line;
160	57					197	$line =~ tr/\r\n//d;
161	57					223	my ($smiles, $name) = split " ", $line, 2;
162
163	57					401	my $mol = $mol_class->new;
164	57	50				1620	unless ($Smiles_parser->parse($smiles, $mol, \%opts)) {
165	0					0	warn "error parsing SMILES line '$line'\n";
166	0					0	$mol = $mol_class->new;
167							}
168	57					565	$mol->name($name);
169	57					438	$self->add_implicit_hydrogens($mol);
170	57	100				627	if ($opts{kekulize}) {
171	56					355	assign_bond_orders($mol, method => "itub", use_coords => 0,
172							scratch => 0, charges => 0);
173							}
174	57					55748	$mol;
175							}
176
177
178							### The contents of the original Chemistry::Smiles module start below
179
180							my $Symbol = qr/
181							s\|p\|o\|n\|c\|b\|Zr\|Zn\|Yb\|Y\|Xe\|W\|V\|U\|Tm\|Tl\|Ti\|Th\|
182							Te\|Tc\|Tb\|Ta\|Sr\|Sn\|Sm\|Si\|Sg\|Se\|Sc\|Sb\|S\|Ru\|Rn\|Rh\|Rf\|Re\|Rb\|Ra\|
183							Pu\|Pt\|Pr\|Po\|Pm\|Pd\|Pb\|Pa\|P\|Os\|O\|Np\|No\|Ni\|Ne\|Nd\|Nb\|Na\|N\|Mt\|Mt\|
184							Mo\|Mn\|Mg\|Md\|Lu\|Lr\|Li\|La\|Kr\|K\|Ir\|In\|I\|Hs\|Hs\|Ho\|Hg\|Hf\|He\|H\|Ge\|
185							Gd\|Ga\|Fr\|Fm\|Fe\|F\|Eu\|Es\|Er\|Dy\|Ds\|Db\|Cu\|Cs\|Cr\|Co\|Cm\|Cl\|Cf\|Ce\|
186							Cd\|Ca\|C\|Br\|Bk\|Bi\|Bh\|Be\|Ba\|B\|Au\|At\|As\|Ar\|Am\|Al\|Ag\|Ac\|\*\|R\|X
187							/x; # Order is reverse alphabetical to ensure longest match
188
189							my $Simple_symbol = qr/Br\|Cl\|B\|C\|N\|O\|P\|S\|F\|I\|H\|s\|p\|o\|n\|c\|b/;
190
191							my $Bond = qr/(?:[-=#:.\/\\])?/;
192							my $Simple_atom = qr/($Simple_symbol)/; #3
193							my $Complex_atom = qr/
194							(?:
195							\[ #begin atom
196							(\d*) #4 isotope
197							($Symbol) #5 symbol
198							(\@{0,2}) #6 chirality
199							(?:(H\d*))? #7 H-count
200							(\+{2,}\|-{2,}\|\+\d\|-\d)? #8 charge
201							(?::(\d+))? #9 name
202							\] #end atom
203							)
204							/x;
205
206							my $Digits = qr/(?:($Bond)(?:\d\|%\d\d))*/;
207							my $Chain = qr/
208							\G( #1
209							(?:
210							($Bond) #2
211							(?:$Simple_atom\|$Complex_atom) #3-9
212							($Digits) #10
213							)
214							\|\(
215							\|\)
216							\|.+
217							)
218							/x;
219
220							my $digits_re = qr/($Bond)(\%\d\d\|\d)/;
221
222							my %type_to_order = (
223							'-' => 1,
224							'=' => 2,
225							'#' => 3,
226							'/' => 1,
227							'\\' => 1,
228							'' => 1, # not strictly true
229							'.' => 0,
230							);
231
232							my %ORGANIC_ELEMS = (
233							Br => 1, Cl => 1, B => 3, C => 4, N => 3, O => 2, P => 3, S => 2,
234							F => 1, I => 1, s => 1, p => 1, o => 1, n => 1, c => 1, b => 1,
235							);
236
237							#=item Chemistry::Smiles->new([add_atom => \&sub1, add_bond => \&sub2])
238							#
239							#Create a SMILES parser. If the add_atom and add_bond subroutine references
240							#are given, they will be called whenever an atom or a bond needs to be added
241							#to the molecule. If they are not specified, default methods, which
242							#create a Chemistry::Mol object, will be used.
243							#
244							#=cut
245
246							sub new_parser {
247	9			9	0	160	my $class = shift;
248	9					31	my %opts = @_;
249	9		100			165	my $self = bless {
			100
250							add_atom => $opts{add_atom} \|\| \&add_atom,
251							add_bond => $opts{add_bond} \|\| \&add_bond,
252							}, $class;
253							}
254
255							#=item $obj->parse($string, $mol)
256							#
257							#Parse a Smiles $string. $mol is a "molecule state object". It can be anything;
258							#the parser doesn't do anything with it except sending it as the first parameter
259							#to the callback functions. If callback functions were not provided when
260							#constructing the parser object, $mol must be a Chemistry::Mol object, because
261							#that's what the default callback functions require.
262							#
263							#=cut
264
265							sub parse {
266	64			64	0	1106	my ($self, $s, $mol, $opts) = @_;
267	64					362	$self->{stack} = [ undef ];
268	64					187	$self->{digits} = {};
269
270	64					131	eval {
271	64					2247	while ($s =~ /$Chain/g) {
272							#my @a = ($1, $2, $3, $4, $5, $6, $7, $8);
273							#print Dumper(\@a);
274	538					3429	my ($all, $bnd, $sym, $iso, $sym2, $chir, $hcnt, $chg, $name, $dig)
275							= ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
276	538	100				1816	if ($all eq '(') {
		100
		100
		50
277	58					167	$self->start_branch();
278							} elsif ($all eq ')') {
279	58					169	$self->end_branch();
280							} elsif ($sym) { # Simple atom
281	8			8		415	no warnings;
	8					17
	8					810
282	409					1078	my @digs = parse_digits($dig);
283	409					1263	$self->atom($mol, $bnd, '', $sym, '', undef, '', \@digs);
284							} elsif ($sym2) { # Complex atom
285	8			8		43	no warnings;
	8					17
	8					12432
286	13					38	my @digs = parse_digits($dig);
287	13	100				38	if ($hcnt eq 'H') {
288	3					8	$hcnt = 1;
289							} else {
290	10					24	$hcnt =~ s/H//;
291							}
292	13	50				43	unless ($chg =~ /\d/) {
293	13	100				44	$chg = ($chg =~ /-/) ? -length($chg) : length($chg);
294							}
295	13		100			125	$self->atom($mol, $bnd, $iso, $sym2, $chir, $hcnt \|\| 0,
			100
296							$chg \|\| 0, \@digs, $name);
297							} else {
298	0					0	die "SMILES ERROR: '$all in $s'\n";
299							}
300							}
301							};
302							# clean up to avoid memory leak
303	64					194	$self->{stack} = undef;
304	64	50				191	if ($@) {
305	0	0				0	croak $@ if $opts->{fatal};
306	0					0	return;
307							}
308	64					288	$mol;
309							}
310
311							sub parse_digits {
312	422			422	0	592	my ($dig) = @_;
313	422					497	my @digs;
314	422		100			1705	while ($dig && $dig =~ /$digits_re/g) {
315	56					597	push @digs, {bnd=>$1, dig=>$2};
316							}
317	422					923	@digs;
318							}
319
320							sub atom {
321	422			422	0	523	my $self = shift;
322	422					1076	my ($mol,$bnd,$iso,$sym,$chir,$hcount,$chg,$digs,$name) = @_;
323							#{no warnings; local $" = ','; print "atom(@_)\n"}
324	422					5680	my $a = $self->{add_atom}($mol,$iso,$sym,$chir,$hcount,$chg,$name);
325	422	100				1912	if($self->{stack}[-1]) {
326	358					2598	$self->{add_bond}($mol, $bnd, $self->{stack}[-1], $a);
327							}
328	422					1201	for my $dig (@$digs) {
329	56	100				192	if ($self->{digits}{$dig->{dig}}) {
330	28	0	33			116	if ($dig->{bnd} && $self->{digits}{$dig->{dig}}{bnd}
			33
331							&& $dig->{bnd} ne $self->{digits}{$dig->{dig}}{bnd}){
332	0					0	die "SMILES: Inconsistent ring closure\n";
333							}
334	28		33			240	$self->{add_bond}($mol,
335							$dig->{bnd} \|\| $self->{digits}{$dig->{dig}}{bnd},
336							$self->{digits}{$dig->{dig}}{atom}, $a);
337	28					167	delete $self->{digits}{$dig->{dig}};
338							} else {
339	28					201	$self->{digits}{$dig->{dig}} = {atom=>$a, bnd=>$dig->{bnd}};
340							}
341							}
342	422					4856	$self->{stack}[-1] = $a;
343							}
344
345							#=back
346							#
347							#=head1 CALLBACK FUNCTIONS
348							#
349							#=over
350							#
351							#=item $atom = add_atom($mol, $iso, $sym, $chir, $hcount, $chg)
352							#
353							#Called by the parser whenever an atom is found. The first parameter is the
354							#state object given to $obj->parse(). The other parameters are the isotope,
355							#symbol, chirality, hydrogen count, and charge of the atom. Only the symbol is
356							#guaranteed to be defined. Mnemonic: the parameters are given in the same order
357							#that is used in a SMILES string (such as [18OH-]). This callback is expected to
358							#return something that uniquely identifies the atom that was created (it might
359							#be a number, a string, or an object).
360							#
361							#=cut
362
363							# Default add_atom callback
364							sub add_atom {
365	378			378	0	704	my ($mol, $iso, $sym, $chir, $hcount, $chg, $name) = @_;
366	378					1518	my $atom = $mol->new_atom(symbol => ucfirst $sym, name => $name);
367	378	50				31821	$iso && $atom->attr('smiles/isotope' => $iso);
368	378	50				720	$iso && $atom->mass($iso);
369	378	50				704	$chir && $atom->attr('smiles/chirality' => $chir);
370	378	100				813	defined $hcount && $atom->hydrogens($hcount);
371	378	100				792	$chg && $atom->formal_charge($chg);
372	378	100				1168	if ($sym =~ /^[a-z]/) {
373	71					380	$atom->attr("smiles/aromatic", 1);
374							}
375	378					1594	$atom;
376							}
377
378							#=item add_bond($mol, $type, $a1, $a2)
379							#
380							#Called by the parser whenever an bond needs to be created. The first parameter
381							#is the state object given to $obj->parse(). The other parameters are the bond
382							#type and the two atoms that need to be bonded. The atoms are identified using
383							#the return values from the add_atom() callback.
384							#
385							#=back
386							#
387							#=end comment
388							#
389							#=cut
390
391							# Default add_bond callback
392							sub add_bond {
393	341			341	0	635	my ($mol, $type, $a1, $a2) = @_;
394	341	100				1006	my $order = $type_to_order{$type} or return; # don't add bonds of order 0
395	333					3488	my $bond = $mol->new_bond(type=>$type, atoms=>[$a1, $a2], order=>$order);
396	333					53981	$bond->attr("smiles/type" => $type);
397	333					4308	$bond;
398							}
399
400							sub start_branch {
401	58			58	0	98	my $self = shift;
402							#print "start_branch\n";
403	58					78	push @{$self->{stack}}, $self->{stack}[-1];
	58					599
404							}
405
406							sub end_branch {
407	58			58	0	88	my $self = shift;
408							#print "end_branch\n";
409	58					86	pop @{$self->{stack}};
	58					561
410							}
411
412							# returns the number of hydrogens for an atom, assuming it has
413							# no charge or radical (because those require an explicit H-count anyway)
414							sub calc_implicit_hydrogens {
415	369			369	0	508	my ($self, $atom) = @_;
416	8			8		59	no warnings 'uninitialized';
	8					13
	8					39603
417	369					906	my $h_count = $ORGANIC_ELEMS{$atom->symbol} - $atom->valence;
418	369	100	100			14714	if ($atom->attr("smiles/aromatic") and $atom->symbol =~ /^[CN]$/) {
419	70					1125	$h_count--;
420							}
421	369	100				4005	$h_count = 0 if $h_count < 0;
422	369					636	$h_count;
423							}
424
425							# returns the number of hydrogens that an atom should have,
426							# taking into account that it may or may not have a few hydrogens
427							# defined already. This assumes that the atom is neutral and not radical
428							sub calc_implicit_hydrogens_2 {
429	407			407	0	18441	my ($self, $atom) = @_;
430	407					1198	my $h_count = $ORGANIC_ELEMS{$atom->symbol} - $atom->valence
431							+ $atom->total_hydrogens;
432	407	100				27062	$h_count = 0 if $h_count < 0;
433	407					2562	$h_count;
434							}
435
436							sub add_implicit_hydrogens {
437	57			57	0	108	my ($self, $mol) = @_;
438	57					1504	for my $atom ($mol->atoms) {
439							#print "H=".$atom->hydrogens."\n";
440	378	100				4129	unless (defined $atom->hydrogens) {
441	369					2437	my $h_count = $self->calc_implicit_hydrogens($atom);
442	369					1017	$atom->hydrogens($h_count);
443							}
444							}
445							}
446
447							##### SMILES WRITER ########
448
449							sub write_string {
450	59			59	1	12910	my ($self, $mol_ref, %opts) = @_;
451
452	59					117	my $eol;
453							my @mols;
454	59	100				210	if ($opts{mols}) {
455	1					2	@mols = @{$opts{mols}};
	1					4
456	1					3	$eol = "\n";
457							} else {
458	58					194	@mols = $mol_ref;
459	58					129	$eol = "";
460							}
461
462	59					96	my $smiles;
463	59					159	for my $mol (@mols) {
464	62					3507	$mol = $mol->clone;
465	62					31012	$mol->collapse_hydrogens;
466	62					18876	my @atoms = $mol->atoms;
467
468	62	50				613	if (@atoms) {
469	62					100	my $i;
470	62	100				204	if ($opts{auto_number}) {
471	1					7	$_->name(++$i) for @atoms;
472	1					53	$opts{number} = 1;
473							}
474	62	100				267	if ($opts{unique}) {
475	15	50				56	unless ($atoms[0]->attr("canon/class")) {
476	15					2903	require Chemistry::Canonicalize;
477	15					128938	Chemistry::Canonicalize::canonicalize($mol);
478							}
479	15					190286	$opts{aromatic} = 1; # all unique smiles have to be aromatic
480	171					1839	@atoms = sort {
481	15					86	$a->attr("canon/class") <=> $b->attr("canon/class")
482							} @atoms;
483							}
484
485	62	100				329	if ($opts{aromatic}) {
486	61					11510	require Chemistry::Ring;
487	61					39729	Chemistry::Ring::aromatize_mol($mol);
488							}
489
490	62					112296	my $visited = {};
491	62					119	my @s;
492	62					147	for my $atom (@atoms) {
493	414	100				5342	next if $visited->{$atom};
494	70					719	my $ring_atoms = {};
495
496							# first pass to find and number the ring bonds
497	70					413	$self->find_ring_bonds($mol, \%opts, $atom, undef, {}, $ring_atoms);
498
499							# second pass to actually generate the SMILES string
500	70					1767	push @s, $self->branch($mol, \%opts, $atom, undef, $visited, $ring_atoms);
501							}
502	62					744	$smiles .= join '.', @s;
503							}
504
505	62	100				200	if ($opts{name}) {
506	8					36	$smiles .= "\t" . $mol->name;
507							}
508	62					299	$smiles .= $eol;
509							}
510	59					3764	return $smiles;
511							}
512
513							sub find_ring_bonds {
514	414			414	0	989	my ($self, $mol, $opts, $atom, $from_bond, $visited, $ring_atoms) = @_;
515
516	414					1022	$visited->{$atom} = 1;
517	414					4051	for my $bn ($self->sorted_bonds_neighbors($atom, $opts)) {
518	736					3993	my $nei = $bn->{to};
519	736					932	my $bond = $bn->{bond};
520	736	100				2123	next if $visited->{$bond};
521	368					3336	$visited->{$bond} = 1;
522	368	100				3870	if ($visited->{$nei}) { # closed ring
523							#print "closing ring\n";
524	24					236	$ring_atoms->{$nei}++;
525							} else {
526	344					3097	$self->find_ring_bonds($mol, $opts, $nei, $bond, $visited, $ring_atoms);
527							}
528							}
529							}
530
531							sub branch {
532	414			414	0	816	my ($self, $mol, $opts, $atom, $from_bond, $visited, $digits) = @_;
533
534	414					597	my $prev_branch = "";
535	414					428	my $smiles;
536	414					1092	$smiles .= $self->bond_symbol($from_bond, $opts);
537							#$digits->{count}++;
538	414					3425	$smiles .= $self->format_atom($atom, $opts);
539	414	100				1833	if ($digits->{$atom}) { # opening a ring
540	24					202	my @d;
541	24					74	for (1 .. $digits->{$atom}) {
542	24					5860	push @d, $self->next_digit($digits);
543							}
544	24					98	$digits->{$atom} = \@d;
545	24	50				255	$smiles .= join "", map { $_ < 10 ? $_ : "%$_"} @d;
	24					148
546							}
547
548	414					3718	$visited->{$atom} = 1;
549	414					3704	my @bns = $self->sorted_bonds_neighbors($atom, $opts);
550
551	414					840	for my $bn (@bns) {
552	736					3357	my $nei = $bn->{to};
553	736					949	my $bond = $bn->{bond};
554	736	100				1728	next if $visited->{$bond};
555	392	100				3601	if ($visited->{$nei}) { # closed a ring
556	24					181	my $digit = shift @{$digits->{$nei}};
	24					61
557	24					310	$smiles .= $self->bond_symbol($bond, $opts);
558	24	50				244	$smiles .= $digit < 10 ? $digit : "%$digit";
559	24					64	$digits->{used_digits}[$digit] = 0; # free for future use
560	24					68	$visited->{$bond} = 1;
561							}
562							}
563
564	414					4477	for my $bn (@bns) {
565	736					2702	my $nei = $bn->{to};
566	736					1573	my $bond = $bn->{bond};
567	736	100				1650	next if $visited->{$bond};
568	344					2968	$visited->{$bond} = 1;
569	344	50				4079	unless ($visited->{$nei}) {
570	344					3931	my $branch = $self->branch($mol, $opts, $nei, $bond, $visited, $digits);
571	344	100				848	if ($prev_branch) {
572	54					106	$smiles .= "($prev_branch)";
573							}
574	344					871	$prev_branch = $branch;
575							}
576							}
577	414					3454	$smiles .= "$prev_branch";
578	414					1506	$smiles;
579							}
580
581							sub next_digit {
582	24			24	0	83	my ($self, $digits) = @_;
583	24					122	for (my $i = 1; $i < 100; $i++) {
584	26	100				123	unless ($digits->{used_digits}[$i]) {
585	24					59	$digits->{used_digits}[$i] = 1; # mark as used
586	24					97	return $i;
587							}
588							}
589	0					0	die "no more available smiles digits!"; # shouldn't happen
590							}
591
592							sub sorted_bonds_neighbors {
593	828			828	0	1693	my ($self, $atom, $opts) = @_;
594	828					2089	my @bn = $atom->bonds_neighbors;
595	828	100				13583	if ($opts->{unique}) {
596	160					1425	@bn = sort {
597	190					454	$a->{to}->attr("canon/class") <=> $b->{to}->attr("canon/class")
598							} @bn;
599							}
600	828					3598	@bn;
601							}
602
603							my %ORDER_TO_TYPE = (
604							2 => '=', 1 => '', 3 => '#',
605							);
606
607							sub bond_symbol {
608	438			438	0	644	my ($self, $bond, $opts) = @_;
609	438	100				1224	return '' unless $bond;
610	368	100	100			3055	return '' if $opts->{aromatic} && $bond->aromatic;
611	273					2442	return $ORDER_TO_TYPE{$bond->order};
612							}
613
614							sub format_atom {
615	414			414	0	639	my ($self, $atom, $opts) = @_;
616
617	414					1071	my $symbol = $atom->symbol;
618	414	100	100			3751	$symbol = lc $symbol if $opts->{aromatic} && $atom->aromatic;
619	414					2873	my $s = $symbol;
620
621							# unless atom is "simple"...
622	414	100	100			1125	if (!$ORGANIC_ELEMS{$atom->symbol} \|\| $atom->formal_charge
			66
			100
			66
623							\|\| $atom->total_hydrogens != $self->calc_implicit_hydrogens_2($atom)
624							\|\| ($opts->{number} && defined $atom->name)
625							) {
626							# "complex atom"; bracketed
627	15					131	my $h_count = $atom->hydrogens;
628	15		100			88	my $charge = $atom->formal_charge \|\| '';
629	15		50			130	my $iso = $atom->attr("smiles/isotope") \|\| '';
630	15					147	my $number = '';
631
632	15	50	66			73	if ($charge and abs($charge) > 1) {
		100
633	0					0	$charge = sprintf("%+d", $charge);
634							} elsif ($charge) {
635	1	50				8	$charge = $charge > 0 ? '+' : '-';
636							}
637
638	15	100				42	$h_count = $h_count ? ($h_count > 1 ? "H$h_count" : 'H') : '';
		100
639
640	15	100	66			64	$number = ':' . $atom->name if $opts->{number} and defined $atom->name;
641
642	15					157	$s = "[$iso$symbol$h_count$charge$number]";
643							}
644	414					1216	$s;
645							}
646
647
648							1;
649
650							=head1 CAVEATS
651
652							Stereochemistry is not supported! Stereochemical descriptors such as @, @@, /,
653							and \ will be silently ignored on input, and will certainly not be produced on
654							output.
655
656							Reading branches that start before an atom, such as (OC)C, which should be
657							equivalent to C(OC) and COC, according to some variants of the SMILES
658							specification. Many other tools don't implement this rule either.
659
660							The kekulize option works by increasing the bond orders of atoms that don't
661							have their usual valences satisfied. This may cause problems if you have atoms
662							with explicitly low hydrogen counts.
663
664							=head1 VERSION
665
666							0.47
667
668							=head1 SEE ALSO
669
670							L, L
671
672							The SMILES Home Page at http://www.daylight.com/dayhtml/smiles/
673
674							The Daylight Theory Manual at
675							http://www.daylight.com/dayhtml/doc/theory/theory.smiles.html
676
677							The PerlMol website L
678
679							=head1 AUTHOR
680
681							Ivan Tubert-Brohman Eitub@cpan.orgE
682
683							=head1 COPYRIGHT
684
685							Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is
686							free software; you can redistribute it and/or modify it under the same terms as
687							Perl itself.
688
689							=cut
690