File Coverage

blib/lib/Chemistry/OpenSMILES.pm
Criterion Covered Total %
statement 140 142 98.5
branch 62 66 93.9
condition 32 39 82.0
subroutine 26 27 96.3
pod 0 12 0.0
total 260 286 90.9


line stmt bran cond sub pod time code
1             package Chemistry::OpenSMILES;
2              
3 23     23   4201 use strict;
  23         107  
  23         741  
4 23     23   126 use warnings;
  23         55  
  23         531  
5 23     23   488 use 5.0100;
  23         79  
6              
7             # ABSTRACT: OpenSMILES format reader and writer
8             our $VERSION = '0.8.6'; # VERSION
9              
10             require Exporter;
11             our @ISA = qw( Exporter );
12             our @EXPORT_OK = qw(
13             %bond_order_to_symbol
14             %bond_symbol_to_order
15             clean_chiral_centers
16             is_aromatic
17             is_chiral
18             is_cis_trans_bond
19             is_double_bond
20             is_ring_atom
21             is_ring_bond
22             is_single_bond
23             is_triple_bond
24             mirror
25             %normal_valence
26             toggle_cistrans
27             );
28              
29 23     23   11387 use Graph::Traversal::BFS;
  23         82237  
  23         1073  
30 23     23   184 use List::Util qw( any none );
  23         51  
  23         58055  
31              
32             sub is_chiral($);
33             sub is_chiral_tetrahedral($);
34             sub mirror($);
35             sub toggle_cistrans($);
36              
37             our %normal_valence = (
38             B => [ 3 ],
39             C => [ 4 ],
40             N => [ 3, 5 ],
41             O => [ 2 ],
42             P => [ 3, 5 ],
43             S => [ 2, 4, 6 ],
44             F => [ 1 ],
45             Cl => [ 1 ],
46             Br => [ 1 ],
47             I => [ 1 ],
48             c => [ 3 ], # Not from OpenSMILES specification
49             );
50              
51             our %bond_order_to_symbol = (
52             1 => '-',
53             1.5 => ':',
54             2 => '=',
55             3 => '#',
56             4 => '$',
57             );
58              
59             our %bond_symbol_to_order = (
60             '-' => 1,
61             ':' => 1.5,
62             '=' => 2,
63             '#' => 3,
64             '$' => 4,
65             );
66              
67             # Removes chiral setting from tetrahedral chiral centers with less than
68             # four distinct neighbours. Only tetrahedral chiral centers with four atoms
69             # are affected, thus three-atom centers (implying lone pairs) are left
70             # untouched. Returns the affected atoms.
71             #
72             # TODO: check other chiral centers
73             sub clean_chiral_centers($$)
74             {
75 7     7 0 5488 my( $moiety, $color_sub ) = @_;
76              
77 7         13 my @affected;
78 7         30 for my $atom ($moiety->vertices) {
79 65 100       284 next unless is_chiral_tetrahedral( $atom );
80             # Anomers must not loose chirality settings
81 7 100       66 next if is_ring_atom( $moiety, $atom, scalar $moiety->edges );
82              
83 5 100       1135 my $hcount = exists $atom->{hcount} ? $atom->{hcount} : 0;
84 5 50       15 next if $moiety->degree($atom) + $hcount != 4;
85              
86 5         2685 my %colors = map { ($color_sub->( $_ ) => 1) }
  20         578  
87             $moiety->neighbours($atom),
88             ( { symbol => 'H' } ) x $hcount;
89 5 100       42 next if scalar keys %colors == 4;
90 3         9 delete $atom->{chirality};
91 3         26 push @affected, $atom;
92             }
93 7         29 return @affected;
94             }
95              
96             sub is_aromatic($)
97             {
98 987     987 0 2024 my( $atom ) = @_;
99 987         5057 return $atom->{symbol} ne ucfirst $atom->{symbol};
100             }
101              
102             sub is_chiral($)
103             {
104 3664     3664 0 6384 my( $what ) = @_;
105 3664 100       7316 if( ref $what eq 'HASH' ) { # Single atom
106 3659         11491 return exists $what->{chirality};
107             } else { # Graph representing moiety
108 5     16   25 return any { is_chiral( $_ ) } $what->vertices;
  16         129  
109             }
110             }
111              
112             sub is_chiral_tetrahedral($)
113             {
114 266     266 0 484 my( $what ) = @_;
115 266 100       610 if( ref $what eq 'HASH' ) { # Single atom
116             # CAVEAT: will fail for allenal configurations of @/@@ in raw mode
117 261   100     1077 return $what->{chirality} && $what->{chirality} =~ /^@@?$/
118             } else { # Graph representing moiety
119 5     19   31 return any { is_chiral_tetrahedral( $_ ) } $what->vertices;
  19         152  
120             }
121             }
122              
123             sub is_cis_trans_bond
124             {
125 61     61 0 15026 my( $moiety, $a, $b ) = @_;
126 61   100     127 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
127             $moiety->get_edge_attribute( $a, $b, 'bond' ) =~ /^[\\\/]$/;
128             }
129              
130             sub is_double_bond
131             {
132 64     64 0 4092 my( $moiety, $a, $b ) = @_;
133 64   100     155 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
134             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '=';
135             }
136              
137             # An atom is deemed to be a ring atom if any of its bonds is a ring bond.
138             sub is_ring_atom
139             {
140 60     60 0 10389 my( $moiety, $atom, $max_length ) = @_;
141 60 100       176 return '' unless $moiety->degree( $atom ) > 1;
142 68     68   10815 return any { is_ring_bond( $moiety, $atom, $_, $max_length ) }
143 36         20939 $moiety->neighbours( $atom );
144             }
145              
146             # A bond is deemed to be a ring bond if there is an alternative path
147             # joining its atoms not including the bond in consideration and this
148             # alternative path is not longer than 7 bonds. This is based on
149             # O'Boyle (2012) saying that Open Babel SMILES writer does not output
150             # cis/trans markers for double bonds in rings of size 8 or less due to
151             # them implicilty being cis bonds.
152             #
153             # If maximum ring size is given negative, ring size is not limited.
154             sub is_ring_bond
155             {
156 122     122 0 9740 my( $moiety, $a, $b, $max_length ) = @_;
157 122 100       282 $max_length = 7 unless $max_length;
158              
159             # A couple of shortcuts to reduce the complexity
160 122 100   240   503 return '' if any { $moiety->degree( $_ ) == 1 } ( $a, $b );
  240         67697  
161 70 100       37381 return '' if scalar( $moiety->vertices ) > scalar( $moiety->edges );
162              
163 56 100       3804 if( $max_length < 0 ) {
164             # Due to the issue in Graph, bridges() returns strings instead of real objects.
165             # Graph issue: https://github.com/graphviz-perl/Graph/issues/29
166 25         88 my %vertices_by_name = map { $_ => $_ } $moiety->vertices;
  600         1561  
167 355 100 100 355   1214 return none { ( $_->[0] == $a && $_->[1] == $b ) ||
      100        
168             ( $_->[0] == $b && $_->[1] == $a ) }
169 25         164 map { [ map { $vertices_by_name{$_} } @$_ ] } $moiety->bridges;
  375         5197  
  750         1306  
170             }
171              
172 31         121 my $copy = $moiety->copy;
173 31         163960 $copy->delete_edge( $a, $b );
174              
175 31         4850 my %distance = ( $a => 0 );
176             my $record_length = sub {
177             # Record number of bonds between $a and any other vertex
178 597     597   272297 my( $u, $v ) = @_;
179 597         1167 my @seen = grep { exists $distance{$_} } ( $u, $v );
  1194         3102  
180 597 50       1512 return '' if @seen != 1; # Can this be 0?
181              
182 597         999 my $seen = shift @seen;
183 597         967 my( $unseen ) = grep { !exists $distance{$_} } ( $u, $v );
  1194         2571  
184 597         1978 $distance{$unseen} = $distance{$seen} + 1;
185 31         151 };
186              
187             my $operations = {
188 31     31   1534 start => sub { return $a },
189 31         149 tree_edge => $record_length,
190             };
191              
192 31         177 my $traversal = Graph::Traversal::BFS->new( $copy, %$operations );
193 31         9798 $traversal->bfs;
194              
195             # $distance{$b} is the distance in bonds. In 8-member rings adjacent
196             # ring atoms have distance of 7 bonds.
197 31   66     27366 return exists $distance{$b} && $distance{$b} <= $max_length;
198             }
199              
200             sub is_single_bond
201             {
202 117     117 0 230 my( $moiety, $a, $b ) = @_;
203 117   66     273 return !$moiety->has_edge_attribute( $a, $b, 'bond' ) ||
204             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '-';
205             }
206              
207             sub is_triple_bond
208             {
209 0     0 0 0 my( $moiety, $a, $b ) = @_;
210 0   0     0 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
211             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '#';
212             }
213              
214             sub mirror($)
215             {
216 30     30 0 50 my( $what ) = @_;
217 30 100       63 if( ref $what eq 'HASH' ) { # Single atom
218             # FIXME: currently dealing only with tetrahedral chiral centers
219 25 100       39 if( is_chiral_tetrahedral( $what ) ) {
220 2 100       9 $what->{chirality} = $what->{chirality} eq '@' ? '@@' : '@';
221             }
222             } else {
223 5         16 for ($what->vertices) {
224 25         155 mirror( $_ );
225             }
226             }
227             }
228              
229             sub toggle_cistrans($)
230             {
231 24 100   24 0 123 return $_[0] eq '/' ? '\\' : '/';
232             }
233              
234             # CAVEAT: requires output from non-raw parsing due issue similar to GH#2
235             sub _validate($@)
236             {
237 13     13   2810 my( $moiety, $color_sub ) = @_;
238              
239 13         46 for my $atom (sort { $a->{number} <=> $b->{number} } $moiety->vertices) {
  244         726  
240             # TODO: AL chiral centers also have to be checked
241 141 100       37964 if( is_chiral_tetrahedral( $atom ) ) {
242 8 100 33     26 if( $moiety->degree($atom) < 3 ) {
    50          
243             # FIXME: there should be a strict mode to forbid lone pairs
244             # FIXME: tetrahedral allenes are false-positives
245             warn sprintf 'chiral center %s(%d) has %d bonds while ' .
246             'at least 3 is required' . "\n",
247             $atom->{symbol},
248             $atom->{number},
249 1         234 $moiety->degree($atom);
250             } elsif( $moiety->degree($atom) == 4 && $color_sub ) {
251 7         8857 my %colors = map { ($color_sub->( $_ ) => 1) }
  28         776  
252             $moiety->neighbours($atom);
253 7 100 100     89 if( scalar keys %colors != 4 &&
254             !is_ring_atom( $moiety, $atom, scalar $moiety->edges ) ) {
255             warn sprintf 'tetrahedral chiral setting for %s(%d) ' .
256             'is not needed as not all 4 neighbours ' .
257             'are distinct' . "\n",
258             $atom->{symbol},
259 1         85 $atom->{number};
260             }
261             }
262             }
263              
264             # Warn about unmarked tetrahedral chiral centers
265 141 100 100     610 if( !is_chiral( $atom ) && $moiety->degree( $atom ) == 4 ) {
266 22         14484 my $color_sub_local = $color_sub;
267 22 100       59 if( !$color_sub_local ) {
268 5     20   47 $color_sub_local = sub { return $_[0]->{symbol} };
  20         60  
269             }
270 22         91 my %colors = map { ($color_sub_local->( $_ ) => 1) }
  88         2477  
271             $moiety->neighbours($atom);
272 22 100       204 if( scalar keys %colors == 4 ) {
273             warn sprintf 'atom %s(%d) has 4 distinct neighbours, ' .
274             'but does not have a chiral setting' . "\n",
275             $atom->{symbol},
276 3         52 $atom->{number};
277             }
278             }
279             }
280              
281             # FIXME: establish deterministic order
282 13         4140 for my $bond ($moiety->edges) {
283 133         23091 my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bond;
  133         410  
284 133 100       364 if( $A eq $B ) {
285             warn sprintf 'atom %s(%d) has bond to itself' . "\n",
286             $A->{symbol},
287 1         14 $A->{number};
288             }
289              
290 133 100       323 if( $moiety->has_edge_attribute( @$bond, 'bond' ) ) {
291 6         1180 my $bond_type = $moiety->get_edge_attribute( @$bond, 'bond' );
292 6 100       1220 if( $bond_type eq '=' ) {
    50          
293             # Test cis/trans bonds
294             # FIXME: Not sure how to check which definition belongs to
295             # which of the double bonds. See COD entry 1547257.
296 1         7 for my $atom (@$bond) {
297 2         13 my %bond_types = _neighbours_per_bond_type( $moiety,
298             $atom );
299 2         10 foreach ('/', '\\') {
300 4 100 100     14 if( $bond_types{$_} && @{$bond_types{$_}} > 1 ) {
  3         23  
301             warn sprintf 'atom %s(%d) has %d bonds of type \'%s\', ' .
302             'cis/trans definitions must not conflict' . "\n",
303             $atom->{symbol},
304             $atom->{number},
305 1         6 scalar @{$bond_types{$_}},
  1         20  
306             $_;
307             }
308             }
309             }
310             } elsif( $bond_type =~ /^[\\\/]$/ ) {
311             # Test if next to a double bond.
312             # FIXME: Yields false-positives for delocalised bonds,
313             # see COD entry 1501863.
314             # FIXME: What about triple bond? See COD entry 4103591.
315 5         10 my %bond_types;
316 5         11 for my $atom (@$bond) {
317 10         23 my %bond_types_now = _neighbours_per_bond_type( $moiety,
318             $atom );
319 10         37 for my $key (keys %bond_types_now) {
320 22         27 push @{$bond_types{$key}}, @{$bond_types_now{$key}};
  22         42  
  22         56  
321             }
322             }
323 5 100       35 if( !$bond_types{'='} ) {
324             warn sprintf 'cis/trans bond is defined between atoms ' .
325             '%s(%d) and %s(%d), but neither of them ' .
326             'is attached to a double bond' . "\n",
327             $A->{symbol},
328             $A->{number},
329             $B->{symbol},
330 1         15 $B->{number};
331             }
332             }
333             }
334             }
335              
336             # TODO: SP, TB, OH chiral centers
337             }
338              
339             sub _neighbours_per_bond_type
340             {
341 12     12   26 my( $moiety, $atom ) = @_;
342 12         17 my %bond_types;
343 12         34 for my $neighbour ($moiety->neighbours($atom)) {
344 38         1302 my $bond_type;
345 38 100       91 if( $moiety->has_edge_attribute( $atom, $neighbour, 'bond' ) ) {
346 24         4653 $bond_type = $moiety->get_edge_attribute( $atom, $neighbour, 'bond' );
347             } else {
348 14         2677 $bond_type = '';
349             }
350 38 100 100     4742 if( $bond_type =~ /^[\\\/]$/ &&
351             $atom->{number} > $neighbour->{number} ) {
352 7         35 $bond_type = toggle_cistrans $bond_type;
353             }
354 38         56 push @{$bond_types{$bond_type}}, $neighbour;
  38         114  
355             }
356 12         55 return %bond_types;
357             }
358              
359             1;
360              
361             __END__