File Coverage

blib/lib/Chemistry/File/SMILES.pm
Criterion Covered Total %
statement 244 258 94.5
branch 94 112 83.9
condition 41 53 77.3
subroutine 31 34 91.1
pod 5 22 22.7
total 415 479 86.6


line stmt bran cond sub pod time code
1             package Chemistry::File::SMILES;
2              
3             $VERSION = "0.47";
4             # $Id: SMILES.pm,v 1.16 2009/05/10 20:31:08 itubert Exp $
5              
6 8     8   136676 use 5.006;
  8         30  
  8         604  
7 8     8   50 use strict;
  8         19  
  8         775  
8 8     8   46 use warnings;
  8         20  
  8         373  
9 8     8   43 no warnings 'recursion';
  8         21  
  8         584  
10 8     8   47 use base "Chemistry::File";
  8         15  
  8         41531  
11 8     8   429306 use Chemistry::Mol;
  8         725461  
  8         1251  
12 8     8   15014 use Chemistry::Bond::Find 'assign_bond_orders';
  8         248080  
  8         1029  
13 8     8   500 use List::Util 'first';
  8         17  
  8         6518  
14 8     8   54 use Carp;
  8         15  
  8         21312  
15              
16              
17             =head1 NAME
18              
19             Chemistry::File::SMILES - SMILES linear notation parser/writer
20              
21             =head1 SYNOPSYS
22              
23             #!/usr/bin/perl
24             use Chemistry::File::SMILES;
25              
26             # parse a SMILES string
27             my $s = 'C1CC1(=O)[O-]';
28             my $mol = Chemistry::Mol->parse($s, format => 'smiles');
29              
30             # print a SMILES string
31             print $mol->print(format => 'smiles');
32              
33             # print a unique (canonical) SMILES string
34             print $mol->print(format => 'smiles', unique => 1);
35              
36             # parse a SMILES file
37             my @mols = Chemistry::Mol->read("file.smi", format => 'smiles');
38              
39             # write a multiline SMILES file
40             Chemistry::Mol->write("file.smi", mols => \@mols);
41              
42              
43             =head1 DESCRIPTION
44              
45             This module parses a SMILES (Simplified Molecular Input Line Entry
46             Specification) string. This is a File I/O driver for the PerlMol project.
47             L. It registers the 'smiles' format with
48             Chemistry::Mol.
49              
50             This parser interprets anything after whitespace as the molecule's name;
51             for example, when the following SMILES string is parsed, $mol->name will be
52             set to "Methyl chloride":
53              
54             CCl Methyl chloride
55              
56             The name is not included by default on output. However, if the C option
57             is defined, the name will be included after the SMILES string, separated by a
58             tab.
59              
60             print $mol->print(format => 'smiles', name => 1);
61              
62             =head2 Multiline SMILES and SMILES files
63              
64             A file or string can contain multiple molecules, one per line.
65              
66             CCl Methyl chloride
67             CO Methanol
68              
69             Files with the extension '.smi' are assumed to have this format.
70              
71             =head2 Atom Mapping Numbers
72              
73             As an extension for reaction processing, SMILES strings may have atom mapping
74             numbers, which are introduced after a colon in a bracketed atom. For example,
75             [C:1]. The mapping number need not be unique. This module reads the mapping
76             numbers and stores them as the name of the atom ($atom->name).
77              
78             On output, atom names are not included by default. See the C and
79             C options below for ways of including them.
80              
81             head1 OPTIONS
82              
83             The following options are supported in addition to the options mentioned for
84             L, such as C, C, and C.
85              
86             =over
87              
88             =item aromatic
89              
90             On output, detect aromatic atoms and bonds by means of the Chemistry::Ring
91             module, and represent the organic aromatic atoms with lowercase symbols.
92              
93             =item unique
94              
95             When used on output, canonicalize the structure if it hasn't been canonicalized
96             already and generate a unique SMILES string. This option implies "aromatic".
97              
98             =item number
99              
100             For atoms that have a defined name, print the name as the "atom number". For
101             example, if an ethanol molecule has the name "42" for the oxygen atom and the
102             other atoms have undefined names, the output would be:
103              
104             CC[OH:42]
105              
106             =item auto_number
107              
108             When used on output, number all the atoms explicitly and sequentially. The
109             output for ethanol would look something like this:
110              
111             [CH3:1][CH2:2][OH:3]
112              
113             =item name
114              
115             Include the molecule name on output, as described in the previous section.
116              
117             =item kekulize
118              
119             When used on input, assign single or double bond orders to "aromatic" or
120             otherwise unspecified bonds (i.e., generate the Kekule structure). If false,
121             the bond orders will remain single. This option is true by default. This uses
122             C from the L module.
123              
124             =back
125              
126             =cut
127              
128             # INITIALIZATION
129             Chemistry::Mol->register_format('smiles');
130             my $Smiles_parser = __PACKAGE__->new_parser;
131              
132             #=begin comment
133             #
134             #=over
135             #
136             #=cut
137              
138             sub file_is {
139 0     0 1 0 my $self = shift;
140 0         0 $self->name_is(@_);
141             }
142              
143             sub name_is {
144 0     0 1 0 my ($self, $name) = @_;
145 0         0 $name =~ /\.smi/;
146             }
147              
148             sub slurp_mol {
149 0     0 1 0 my ($self, $fh) = @_;
150 0         0 scalar <$fh>;
151             }
152              
153             sub read_mol {
154 111     111 1 89396 my ($self, $fh, %opts) = @_;
155 111         852 %opts = (kekulize => 1, %opts);
156 111   50     519 my $mol_class = $opts{mol_class} || "Chemistry::Mol";
157              
158 111         639 my $line = <$fh>;
159 111 100       568 return unless defined $line;
160 57         197 $line =~ tr/\r\n//d;
161 57         223 my ($smiles, $name) = split " ", $line, 2;
162              
163 57         401 my $mol = $mol_class->new;
164 57 50       1620 unless ($Smiles_parser->parse($smiles, $mol, \%opts)) {
165 0         0 warn "error parsing SMILES line '$line'\n";
166 0         0 $mol = $mol_class->new;
167             }
168 57         565 $mol->name($name);
169 57         438 $self->add_implicit_hydrogens($mol);
170 57 100       627 if ($opts{kekulize}) {
171 56         355 assign_bond_orders($mol, method => "itub", use_coords => 0,
172             scratch => 0, charges => 0);
173             }
174 57         55748 $mol;
175             }
176              
177              
178             ### The contents of the original Chemistry::Smiles module start below
179              
180             my $Symbol = qr/
181             s|p|o|n|c|b|Zr|Zn|Yb|Y|Xe|W|V|U|Tm|Tl|Ti|Th|
182             Te|Tc|Tb|Ta|Sr|Sn|Sm|Si|Sg|Se|Sc|Sb|S|Ru|Rn|Rh|Rf|Re|Rb|Ra|
183             Pu|Pt|Pr|Po|Pm|Pd|Pb|Pa|P|Os|O|Np|No|Ni|Ne|Nd|Nb|Na|N|Mt|Mt|
184             Mo|Mn|Mg|Md|Lu|Lr|Li|La|Kr|K|Ir|In|I|Hs|Hs|Ho|Hg|Hf|He|H|Ge|
185             Gd|Ga|Fr|Fm|Fe|F|Eu|Es|Er|Dy|Ds|Db|Cu|Cs|Cr|Co|Cm|Cl|Cf|Ce|
186             Cd|Ca|C|Br|Bk|Bi|Bh|Be|Ba|B|Au|At|As|Ar|Am|Al|Ag|Ac|\*|R|X
187             /x; # Order is reverse alphabetical to ensure longest match
188              
189             my $Simple_symbol = qr/Br|Cl|B|C|N|O|P|S|F|I|H|s|p|o|n|c|b/;
190              
191             my $Bond = qr/(?:[-=#:.\/\\])?/;
192             my $Simple_atom = qr/($Simple_symbol)/; #3
193             my $Complex_atom = qr/
194             (?:
195             \[ #begin atom
196             (\d*) #4 isotope
197             ($Symbol) #5 symbol
198             (\@{0,2}) #6 chirality
199             (?:(H\d*))? #7 H-count
200             (\+{2,}|-{2,}|\+\d*|-\d*)? #8 charge
201             (?::(\d+))? #9 name
202             \] #end atom
203             )
204             /x;
205              
206             my $Digits = qr/(?:($Bond)(?:\d|%\d\d))*/;
207             my $Chain = qr/
208             \G( #1
209             (?:
210             ($Bond) #2
211             (?:$Simple_atom|$Complex_atom) #3-9
212             ($Digits) #10
213             )
214             |\(
215             |\)
216             |.+
217             )
218             /x;
219              
220             my $digits_re = qr/($Bond)(\%\d\d|\d)/;
221              
222             my %type_to_order = (
223             '-' => 1,
224             '=' => 2,
225             '#' => 3,
226             '/' => 1,
227             '\\' => 1,
228             '' => 1, # not strictly true
229             '.' => 0,
230             );
231              
232             my %ORGANIC_ELEMS = (
233             Br => 1, Cl => 1, B => 3, C => 4, N => 3, O => 2, P => 3, S => 2,
234             F => 1, I => 1, s => 1, p => 1, o => 1, n => 1, c => 1, b => 1,
235             );
236              
237             #=item Chemistry::Smiles->new([add_atom => \&sub1, add_bond => \&sub2])
238             #
239             #Create a SMILES parser. If the add_atom and add_bond subroutine references
240             #are given, they will be called whenever an atom or a bond needs to be added
241             #to the molecule. If they are not specified, default methods, which
242             #create a Chemistry::Mol object, will be used.
243             #
244             #=cut
245              
246             sub new_parser {
247 9     9 0 160 my $class = shift;
248 9         31 my %opts = @_;
249 9   100     165 my $self = bless {
      100        
250             add_atom => $opts{add_atom} || \&add_atom,
251             add_bond => $opts{add_bond} || \&add_bond,
252             }, $class;
253             }
254              
255             #=item $obj->parse($string, $mol)
256             #
257             #Parse a Smiles $string. $mol is a "molecule state object". It can be anything;
258             #the parser doesn't do anything with it except sending it as the first parameter
259             #to the callback functions. If callback functions were not provided when
260             #constructing the parser object, $mol must be a Chemistry::Mol object, because
261             #that's what the default callback functions require.
262             #
263             #=cut
264              
265             sub parse {
266 64     64 0 1106 my ($self, $s, $mol, $opts) = @_;
267 64         362 $self->{stack} = [ undef ];
268 64         187 $self->{digits} = {};
269              
270 64         131 eval {
271 64         2247 while ($s =~ /$Chain/g) {
272             #my @a = ($1, $2, $3, $4, $5, $6, $7, $8);
273             #print Dumper(\@a);
274 538         3429 my ($all, $bnd, $sym, $iso, $sym2, $chir, $hcnt, $chg, $name, $dig)
275             = ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10);
276 538 100       1816 if ($all eq '(') {
    100          
    100          
    50          
277 58         167 $self->start_branch();
278             } elsif ($all eq ')') {
279 58         169 $self->end_branch();
280             } elsif ($sym) { # Simple atom
281 8     8   415 no warnings;
  8         17  
  8         810  
282 409         1078 my @digs = parse_digits($dig);
283 409         1263 $self->atom($mol, $bnd, '', $sym, '', undef, '', \@digs);
284             } elsif ($sym2) { # Complex atom
285 8     8   43 no warnings;
  8         17  
  8         12432  
286 13         38 my @digs = parse_digits($dig);
287 13 100       38 if ($hcnt eq 'H') {
288 3         8 $hcnt = 1;
289             } else {
290 10         24 $hcnt =~ s/H//;
291             }
292 13 50       43 unless ($chg =~ /\d/) {
293 13 100       44 $chg = ($chg =~ /-/) ? -length($chg) : length($chg);
294             }
295 13   100     125 $self->atom($mol, $bnd, $iso, $sym2, $chir, $hcnt || 0,
      100        
296             $chg || 0, \@digs, $name);
297             } else {
298 0         0 die "SMILES ERROR: '$all in $s'\n";
299             }
300             }
301             };
302             # clean up to avoid memory leak
303 64         194 $self->{stack} = undef;
304 64 50       191 if ($@) {
305 0 0       0 croak $@ if $opts->{fatal};
306 0         0 return;
307             }
308 64         288 $mol;
309             }
310              
311             sub parse_digits {
312 422     422 0 592 my ($dig) = @_;
313 422         497 my @digs;
314 422   100     1705 while ($dig && $dig =~ /$digits_re/g) {
315 56         597 push @digs, {bnd=>$1, dig=>$2};
316             }
317 422         923 @digs;
318             }
319              
320             sub atom {
321 422     422 0 523 my $self = shift;
322 422         1076 my ($mol,$bnd,$iso,$sym,$chir,$hcount,$chg,$digs,$name) = @_;
323             #{no warnings; local $" = ','; print "atom(@_)\n"}
324 422         5680 my $a = $self->{add_atom}($mol,$iso,$sym,$chir,$hcount,$chg,$name);
325 422 100       1912 if($self->{stack}[-1]) {
326 358         2598 $self->{add_bond}($mol, $bnd, $self->{stack}[-1], $a);
327             }
328 422         1201 for my $dig (@$digs) {
329 56 100       192 if ($self->{digits}{$dig->{dig}}) {
330 28 0 33     116 if ($dig->{bnd} && $self->{digits}{$dig->{dig}}{bnd}
      33        
331             && $dig->{bnd} ne $self->{digits}{$dig->{dig}}{bnd}){
332 0         0 die "SMILES: Inconsistent ring closure\n";
333             }
334 28   33     240 $self->{add_bond}($mol,
335             $dig->{bnd} || $self->{digits}{$dig->{dig}}{bnd},
336             $self->{digits}{$dig->{dig}}{atom}, $a);
337 28         167 delete $self->{digits}{$dig->{dig}};
338             } else {
339 28         201 $self->{digits}{$dig->{dig}} = {atom=>$a, bnd=>$dig->{bnd}};
340             }
341             }
342 422         4856 $self->{stack}[-1] = $a;
343             }
344              
345             #=back
346             #
347             #=head1 CALLBACK FUNCTIONS
348             #
349             #=over
350             #
351             #=item $atom = add_atom($mol, $iso, $sym, $chir, $hcount, $chg)
352             #
353             #Called by the parser whenever an atom is found. The first parameter is the
354             #state object given to $obj->parse(). The other parameters are the isotope,
355             #symbol, chirality, hydrogen count, and charge of the atom. Only the symbol is
356             #guaranteed to be defined. Mnemonic: the parameters are given in the same order
357             #that is used in a SMILES string (such as [18OH-]). This callback is expected to
358             #return something that uniquely identifies the atom that was created (it might
359             #be a number, a string, or an object).
360             #
361             #=cut
362              
363             # Default add_atom callback
364             sub add_atom {
365 378     378 0 704 my ($mol, $iso, $sym, $chir, $hcount, $chg, $name) = @_;
366 378         1518 my $atom = $mol->new_atom(symbol => ucfirst $sym, name => $name);
367 378 50       31821 $iso && $atom->attr('smiles/isotope' => $iso);
368 378 50       720 $iso && $atom->mass($iso);
369 378 50       704 $chir && $atom->attr('smiles/chirality' => $chir);
370 378 100       813 defined $hcount && $atom->hydrogens($hcount);
371 378 100       792 $chg && $atom->formal_charge($chg);
372 378 100       1168 if ($sym =~ /^[a-z]/) {
373 71         380 $atom->attr("smiles/aromatic", 1);
374             }
375 378         1594 $atom;
376             }
377              
378             #=item add_bond($mol, $type, $a1, $a2)
379             #
380             #Called by the parser whenever an bond needs to be created. The first parameter
381             #is the state object given to $obj->parse(). The other parameters are the bond
382             #type and the two atoms that need to be bonded. The atoms are identified using
383             #the return values from the add_atom() callback.
384             #
385             #=back
386             #
387             #=end comment
388             #
389             #=cut
390              
391             # Default add_bond callback
392             sub add_bond {
393 341     341 0 635 my ($mol, $type, $a1, $a2) = @_;
394 341 100       1006 my $order = $type_to_order{$type} or return; # don't add bonds of order 0
395 333         3488 my $bond = $mol->new_bond(type=>$type, atoms=>[$a1, $a2], order=>$order);
396 333         53981 $bond->attr("smiles/type" => $type);
397 333         4308 $bond;
398             }
399              
400             sub start_branch {
401 58     58 0 98 my $self = shift;
402             #print "start_branch\n";
403 58         78 push @{$self->{stack}}, $self->{stack}[-1];
  58         599  
404             }
405              
406             sub end_branch {
407 58     58 0 88 my $self = shift;
408             #print "end_branch\n";
409 58         86 pop @{$self->{stack}};
  58         561  
410             }
411            
412             # returns the number of hydrogens for an atom, assuming it has
413             # no charge or radical (because those require an explicit H-count anyway)
414             sub calc_implicit_hydrogens {
415 369     369 0 508 my ($self, $atom) = @_;
416 8     8   59 no warnings 'uninitialized';
  8         13  
  8         39603  
417 369         906 my $h_count = $ORGANIC_ELEMS{$atom->symbol} - $atom->valence;
418 369 100 100     14714 if ($atom->attr("smiles/aromatic") and $atom->symbol =~ /^[CN]$/) {
419 70         1125 $h_count--;
420             }
421 369 100       4005 $h_count = 0 if $h_count < 0;
422 369         636 $h_count;
423             }
424              
425             # returns the number of hydrogens that an atom should have,
426             # taking into account that it may or may not have a few hydrogens
427             # defined already. This assumes that the atom is neutral and not radical
428             sub calc_implicit_hydrogens_2 {
429 407     407 0 18441 my ($self, $atom) = @_;
430 407         1198 my $h_count = $ORGANIC_ELEMS{$atom->symbol} - $atom->valence
431             + $atom->total_hydrogens;
432 407 100       27062 $h_count = 0 if $h_count < 0;
433 407         2562 $h_count;
434             }
435              
436             sub add_implicit_hydrogens {
437 57     57 0 108 my ($self, $mol) = @_;
438 57         1504 for my $atom ($mol->atoms) {
439             #print "H=".$atom->hydrogens."\n";
440 378 100       4129 unless (defined $atom->hydrogens) {
441 369         2437 my $h_count = $self->calc_implicit_hydrogens($atom);
442 369         1017 $atom->hydrogens($h_count);
443             }
444             }
445             }
446              
447             ##### SMILES WRITER ########
448              
449             sub write_string {
450 59     59 1 12910 my ($self, $mol_ref, %opts) = @_;
451              
452 59         117 my $eol;
453             my @mols;
454 59 100       210 if ($opts{mols}) {
455 1         2 @mols = @{$opts{mols}};
  1         4  
456 1         3 $eol = "\n";
457             } else {
458 58         194 @mols = $mol_ref;
459 58         129 $eol = "";
460             }
461              
462 59         96 my $smiles;
463 59         159 for my $mol (@mols) {
464 62         3507 $mol = $mol->clone;
465 62         31012 $mol->collapse_hydrogens;
466 62         18876 my @atoms = $mol->atoms;
467              
468 62 50       613 if (@atoms) {
469 62         100 my $i;
470 62 100       204 if ($opts{auto_number}) {
471 1         7 $_->name(++$i) for @atoms;
472 1         53 $opts{number} = 1;
473             }
474 62 100       267 if ($opts{unique}) {
475 15 50       56 unless ($atoms[0]->attr("canon/class")) {
476 15         2903 require Chemistry::Canonicalize;
477 15         128938 Chemistry::Canonicalize::canonicalize($mol);
478             }
479 15         190286 $opts{aromatic} = 1; # all unique smiles have to be aromatic
480 171         1839 @atoms = sort {
481 15         86 $a->attr("canon/class") <=> $b->attr("canon/class")
482             } @atoms;
483             }
484              
485 62 100       329 if ($opts{aromatic}) {
486 61         11510 require Chemistry::Ring;
487 61         39729 Chemistry::Ring::aromatize_mol($mol);
488             }
489              
490 62         112296 my $visited = {};
491 62         119 my @s;
492 62         147 for my $atom (@atoms) {
493 414 100       5342 next if $visited->{$atom};
494 70         719 my $ring_atoms = {};
495              
496             # first pass to find and number the ring bonds
497 70         413 $self->find_ring_bonds($mol, \%opts, $atom, undef, {}, $ring_atoms);
498              
499             # second pass to actually generate the SMILES string
500 70         1767 push @s, $self->branch($mol, \%opts, $atom, undef, $visited, $ring_atoms);
501             }
502 62         744 $smiles .= join '.', @s;
503             }
504              
505 62 100       200 if ($opts{name}) {
506 8         36 $smiles .= "\t" . $mol->name;
507             }
508 62         299 $smiles .= $eol;
509             }
510 59         3764 return $smiles;
511             }
512              
513             sub find_ring_bonds {
514 414     414 0 989 my ($self, $mol, $opts, $atom, $from_bond, $visited, $ring_atoms) = @_;
515              
516 414         1022 $visited->{$atom} = 1;
517 414         4051 for my $bn ($self->sorted_bonds_neighbors($atom, $opts)) {
518 736         3993 my $nei = $bn->{to};
519 736         932 my $bond = $bn->{bond};
520 736 100       2123 next if $visited->{$bond};
521 368         3336 $visited->{$bond} = 1;
522 368 100       3870 if ($visited->{$nei}) { # closed ring
523             #print "closing ring\n";
524 24         236 $ring_atoms->{$nei}++;
525             } else {
526 344         3097 $self->find_ring_bonds($mol, $opts, $nei, $bond, $visited, $ring_atoms);
527             }
528             }
529             }
530              
531             sub branch {
532 414     414 0 816 my ($self, $mol, $opts, $atom, $from_bond, $visited, $digits) = @_;
533              
534 414         597 my $prev_branch = "";
535 414         428 my $smiles;
536 414         1092 $smiles .= $self->bond_symbol($from_bond, $opts);
537             #$digits->{count}++;
538 414         3425 $smiles .= $self->format_atom($atom, $opts);
539 414 100       1833 if ($digits->{$atom}) { # opening a ring
540 24         202 my @d;
541 24         74 for (1 .. $digits->{$atom}) {
542 24         5860 push @d, $self->next_digit($digits);
543             }
544 24         98 $digits->{$atom} = \@d;
545 24 50       255 $smiles .= join "", map { $_ < 10 ? $_ : "%$_"} @d;
  24         148  
546             }
547              
548 414         3718 $visited->{$atom} = 1;
549 414         3704 my @bns = $self->sorted_bonds_neighbors($atom, $opts);
550              
551 414         840 for my $bn (@bns) {
552 736         3357 my $nei = $bn->{to};
553 736         949 my $bond = $bn->{bond};
554 736 100       1728 next if $visited->{$bond};
555 392 100       3601 if ($visited->{$nei}) { # closed a ring
556 24         181 my $digit = shift @{$digits->{$nei}};
  24         61  
557 24         310 $smiles .= $self->bond_symbol($bond, $opts);
558 24 50       244 $smiles .= $digit < 10 ? $digit : "%$digit";
559 24         64 $digits->{used_digits}[$digit] = 0; # free for future use
560 24         68 $visited->{$bond} = 1;
561             }
562             }
563            
564 414         4477 for my $bn (@bns) {
565 736         2702 my $nei = $bn->{to};
566 736         1573 my $bond = $bn->{bond};
567 736 100       1650 next if $visited->{$bond};
568 344         2968 $visited->{$bond} = 1;
569 344 50       4079 unless ($visited->{$nei}) {
570 344         3931 my $branch = $self->branch($mol, $opts, $nei, $bond, $visited, $digits);
571 344 100       848 if ($prev_branch) {
572 54         106 $smiles .= "($prev_branch)";
573             }
574 344         871 $prev_branch = $branch;
575             }
576             }
577 414         3454 $smiles .= "$prev_branch";
578 414         1506 $smiles;
579             }
580              
581             sub next_digit {
582 24     24 0 83 my ($self, $digits) = @_;
583 24         122 for (my $i = 1; $i < 100; $i++) {
584 26 100       123 unless ($digits->{used_digits}[$i]) {
585 24         59 $digits->{used_digits}[$i] = 1; # mark as used
586 24         97 return $i;
587             }
588             }
589 0         0 die "no more available smiles digits!"; # shouldn't happen
590             }
591              
592             sub sorted_bonds_neighbors {
593 828     828 0 1693 my ($self, $atom, $opts) = @_;
594 828         2089 my @bn = $atom->bonds_neighbors;
595 828 100       13583 if ($opts->{unique}) {
596 160         1425 @bn = sort {
597 190         454 $a->{to}->attr("canon/class") <=> $b->{to}->attr("canon/class")
598             } @bn;
599             }
600 828         3598 @bn;
601             }
602              
603             my %ORDER_TO_TYPE = (
604             2 => '=', 1 => '', 3 => '#',
605             );
606              
607             sub bond_symbol {
608 438     438 0 644 my ($self, $bond, $opts) = @_;
609 438 100       1224 return '' unless $bond;
610 368 100 100     3055 return '' if $opts->{aromatic} && $bond->aromatic;
611 273         2442 return $ORDER_TO_TYPE{$bond->order};
612             }
613              
614             sub format_atom {
615 414     414 0 639 my ($self, $atom, $opts) = @_;
616              
617 414         1071 my $symbol = $atom->symbol;
618 414 100 100     3751 $symbol = lc $symbol if $opts->{aromatic} && $atom->aromatic;
619 414         2873 my $s = $symbol;
620              
621             # unless atom is "simple"...
622 414 100 100     1125 if (!$ORGANIC_ELEMS{$atom->symbol} || $atom->formal_charge
      66        
      100        
      66        
623             || $atom->total_hydrogens != $self->calc_implicit_hydrogens_2($atom)
624             || ($opts->{number} && defined $atom->name)
625             ) {
626             # "complex atom"; bracketed
627 15         131 my $h_count = $atom->hydrogens;
628 15   100     88 my $charge = $atom->formal_charge || '';
629 15   50     130 my $iso = $atom->attr("smiles/isotope") || '';
630 15         147 my $number = '';
631              
632 15 50 66     73 if ($charge and abs($charge) > 1) {
    100          
633 0         0 $charge = sprintf("%+d", $charge);
634             } elsif ($charge) {
635 1 50       8 $charge = $charge > 0 ? '+' : '-';
636             }
637              
638 15 100       42 $h_count = $h_count ? ($h_count > 1 ? "H$h_count" : 'H') : '';
    100          
639              
640 15 100 66     64 $number = ':' . $atom->name if $opts->{number} and defined $atom->name;
641              
642 15         157 $s = "[$iso$symbol$h_count$charge$number]";
643             }
644 414         1216 $s;
645             }
646              
647              
648             1;
649              
650             =head1 CAVEATS
651              
652             Stereochemistry is not supported! Stereochemical descriptors such as @, @@, /,
653             and \ will be silently ignored on input, and will certainly not be produced on
654             output.
655              
656             Reading branches that start before an atom, such as (OC)C, which should be
657             equivalent to C(OC) and COC, according to some variants of the SMILES
658             specification. Many other tools don't implement this rule either.
659              
660             The kekulize option works by increasing the bond orders of atoms that don't
661             have their usual valences satisfied. This may cause problems if you have atoms
662             with explicitly low hydrogen counts.
663              
664             =head1 VERSION
665              
666             0.47
667              
668             =head1 SEE ALSO
669              
670             L, L
671              
672             The SMILES Home Page at http://www.daylight.com/dayhtml/smiles/
673              
674             The Daylight Theory Manual at
675             http://www.daylight.com/dayhtml/doc/theory/theory.smiles.html
676              
677             The PerlMol website L
678              
679             =head1 AUTHOR
680              
681             Ivan Tubert-Brohman Eitub@cpan.orgE
682              
683             =head1 COPYRIGHT
684              
685             Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is
686             free software; you can redistribute it and/or modify it under the same terms as
687             Perl itself.
688              
689             =cut
690