File Coverage

blib/lib/Chemistry/OpenSMILES.pm
Criterion Covered Total %
statement 125 125 100.0
branch 49 54 90.7
condition 22 27 81.4
subroutine 22 22 100.0
pod 0 10 0.0
total 218 238 91.6


line stmt bran cond sub pod time code
1             package Chemistry::OpenSMILES;
2              
3 21     21   3157 use strict;
  21         78  
  21         591  
4 21     21   111 use warnings;
  21         55  
  21         473  
5 21     21   419 use 5.0100;
  21         82  
6              
7             # ABSTRACT: OpenSMILES format reader and writer
8             our $VERSION = '0.8.5'; # VERSION
9              
10             require Exporter;
11             our @ISA = qw( Exporter );
12             our @EXPORT_OK = qw(
13             clean_chiral_centers
14             is_aromatic
15             is_chiral
16             is_cis_trans_bond
17             is_double_bond
18             is_ring_bond
19             is_single_bond
20             mirror
21             %normal_valence
22             toggle_cistrans
23             );
24              
25 21     21   10290 use Graph::Traversal::BFS;
  21         73353  
  21         910  
26 21     21   180 use List::Util qw(any);
  21         58  
  21         42467  
27              
28             sub is_chiral($);
29             sub is_chiral_tetrahedral($);
30             sub mirror($);
31             sub toggle_cistrans($);
32              
33             our %normal_valence = (
34             B => [ 3 ],
35             C => [ 4 ],
36             N => [ 3, 5 ],
37             O => [ 2 ],
38             P => [ 3, 5 ],
39             S => [ 2, 4, 6 ],
40             F => [ 1 ],
41             Cl => [ 1 ],
42             Br => [ 1 ],
43             I => [ 1 ],
44             c => [ 3 ], # Not from OpenSMILES specification
45             );
46              
47             # Removes chiral setting from tetrahedral chiral centers with less than
48             # four distinct neighbours. Only tetrahedral chiral centers with four atoms
49             # are affected, thus three-atom centers (implying lone pairs) are left
50             # untouched. Returns the affected atoms.
51             #
52             # CAVEAT: disregards anomers
53             # TODO: check other chiral centers
54             sub clean_chiral_centers($$)
55             {
56 7     7 0 5212 my( $moiety, $color_sub ) = @_;
57              
58 7         14 my @affected;
59 7         24 for my $atom ($moiety->vertices) {
60 65 100       268 next unless is_chiral_tetrahedral( $atom );
61              
62 7 100       25 my $hcount = exists $atom->{hcount} ? $atom->{hcount} : 0;
63 7 50       53 next if $moiety->degree($atom) + $hcount != 4;
64              
65 7         4197 my %colors = map { ($color_sub->( $_ ) => 1) }
  28         776  
66             $moiety->neighbours($atom),
67             ( { symbol => 'H' } ) x $hcount;
68 7 100       63 next if scalar keys %colors == 4;
69 5         13 delete $atom->{chirality};
70 5         18 push @affected, $atom;
71             }
72 7         26 return @affected;
73             }
74              
75             sub is_aromatic($)
76             {
77 976     976 0 1969 my( $atom ) = @_;
78 976         5010 return $atom->{symbol} ne ucfirst $atom->{symbol};
79             }
80              
81             sub is_chiral($)
82             {
83 3594     3594 0 6232 my( $what ) = @_;
84 3594 100       6931 if( ref $what eq 'HASH' ) { # Single atom
85 3589         11388 return exists $what->{chirality};
86             } else { # Graph representing moiety
87 5     16   26 return any { is_chiral( $_ ) } $what->vertices;
  16         134  
88             }
89             }
90              
91             sub is_chiral_tetrahedral($)
92             {
93 266     266 0 438 my( $what ) = @_;
94 266 100       545 if( ref $what eq 'HASH' ) { # Single atom
95             # CAVEAT: will fail for allenal configurations of @/@@ in raw mode
96 261   100     916 return $what->{chirality} && $what->{chirality} =~ /^@@?$/
97             } else { # Graph representing moiety
98 5     19   29 return any { is_chiral_tetrahedral( $_ ) } $what->vertices;
  19         146  
99             }
100             }
101              
102             sub is_cis_trans_bond
103             {
104 61     61 0 14120 my( $moiety, $a, $b ) = @_;
105 61   100     131 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
106             $moiety->get_edge_attribute( $a, $b, 'bond' ) =~ /^[\\\/]$/;
107             }
108              
109             sub is_double_bond
110             {
111 60     60 0 2524 my( $moiety, $a, $b ) = @_;
112 60   100     135 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
113             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '=';
114             }
115              
116             # A bond is deemed to be a ring bond if there is an alternative path
117             # joining its atoms not including the bond in consideration and this
118             # alternative path is not longer than 7 bonds. This is based on
119             # O'Boyle (2012) saying that Open Babel SMILES writer does not output
120             # cis/trans markers for double bonds in rings of size 8 or less due to
121             # them implicilty being cis bonds.
122             sub is_ring_bond
123             {
124 4     4 0 1543 my( $moiety, $a, $b, $max_length ) = @_;
125              
126 4 50       12 $max_length = 7 unless $max_length;
127              
128 4         11 my $copy = $moiety->copy;
129 4         8198 $copy->delete_edge( $a, $b );
130              
131 4         674 my %distance = ( $a => 0 );
132             my $record_length = sub {
133             # Record number of bonds between $a and any other vertex
134 12     12   5064 my( $u, $v ) = @_;
135 12         25 my @seen = grep { exists $distance{$_} } ( $u, $v );
  24         63  
136 12 50       39 return if @seen != 1; # Can this be 0?
137              
138 12         29 my $seen = shift @seen;
139 12         23 my( $unseen ) = grep { !exists $distance{$_} } ( $u, $v );
  24         56  
140 12         41 $distance{$unseen} = $distance{$seen} + 1;
141 4         23 };
142              
143             my $operations = {
144 4     4   192 start => sub { return $a },
145 4         19 tree_edge => $record_length,
146             };
147              
148 4         32 my $traversal = Graph::Traversal::BFS->new( $copy, %$operations );
149 4         1398 $traversal->bfs;
150              
151             # $distance{$b} is the distance in bonds. In 8-member rings adjacent
152             # ring atoms have distance of 7 bonds.
153 4   33     2222 return exists $distance{$b} && $distance{$b} <= $max_length;
154             }
155              
156             sub is_single_bond
157             {
158 117     117 0 252 my( $moiety, $a, $b ) = @_;
159 117   66     258 return !$moiety->has_edge_attribute( $a, $b, 'bond' ) ||
160             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '-';
161             }
162              
163             sub mirror($)
164             {
165 30     30 0 52 my( $what ) = @_;
166 30 100       53 if( ref $what eq 'HASH' ) { # Single atom
167             # FIXME: currently dealing only with tetrahedral chiral centers
168 25 100       46 if( is_chiral_tetrahedral( $what ) ) {
169 2 100       10 $what->{chirality} = $what->{chirality} eq '@' ? '@@' : '@';
170             }
171             } else {
172 5         16 for ($what->vertices) {
173 25         148 mirror( $_ );
174             }
175             }
176             }
177              
178             sub toggle_cistrans($)
179             {
180 24 100   24 0 107 return $_[0] eq '/' ? '\\' : '/';
181             }
182              
183             # CAVEAT: requires output from non-raw parsing due issue similar to GH#2
184             sub _validate($@)
185             {
186 13     13   2301 my( $moiety, $color_sub ) = @_;
187              
188 13         39 for my $atom (sort { $a->{number} <=> $b->{number} } $moiety->vertices) {
  244         718  
189             # TODO: AL chiral centers also have to be checked
190 141 100       37529 if( is_chiral_tetrahedral( $atom ) ) {
191 6 100 33     18 if( $moiety->degree($atom) < 3 ) {
    50          
192             # FIXME: there should be a strict mode to forbid lone pairs
193             # FIXME: tetrahedral allenes are false-positives
194             warn sprintf 'chiral center %s(%d) has %d bonds while ' .
195             'at least 3 is required' . "\n",
196             $atom->{symbol},
197             $atom->{number},
198 1         234 $moiety->degree($atom);
199             } elsif( $moiety->degree($atom) == 4 && $color_sub ) {
200 5         6084 my %colors = map { ($color_sub->( $_ ) => 1) }
  20         546  
201             $moiety->neighbours($atom);
202 5 100       43 if( scalar keys %colors != 4 ) {
203             # FIXME: anomers are false-positives, see COD entry
204             # 7111036
205             warn sprintf 'tetrahedral chiral setting for %s(%d) ' .
206             'is not needed as not all 4 neighbours ' .
207             'are distinct' . "\n",
208             $atom->{symbol},
209 3         29 $atom->{number};
210             }
211             }
212             }
213              
214             # Warn about unmarked tetrahedral chiral centers
215 141 100 100     524 if( !is_chiral( $atom ) && $moiety->degree( $atom ) == 4 ) {
216 24         15075 my $color_sub_local = $color_sub;
217 24 100       57 if( !$color_sub_local ) {
218 5     20   21 $color_sub_local = sub { return $_[0]->{symbol} };
  20         69  
219             }
220 24         98 my %colors = map { ($color_sub_local->( $_ ) => 1) }
  96         2650  
221             $moiety->neighbours($atom);
222 24 100       182 if( scalar keys %colors == 4 ) {
223             warn sprintf 'atom %s(%d) has 4 distinct neighbours, ' .
224             'but does not have a chiral setting' . "\n",
225             $atom->{symbol},
226 3         59 $atom->{number};
227             }
228             }
229             }
230              
231             # FIXME: establish deterministic order
232 13         4073 for my $bond ($moiety->edges) {
233 133         22619 my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bond;
  133         379  
234 133 100       316 if( $A eq $B ) {
235             warn sprintf 'atom %s(%d) has bond to itself' . "\n",
236             $A->{symbol},
237 1         11 $A->{number};
238             }
239              
240 133 100       303 if( $moiety->has_edge_attribute( @$bond, 'bond' ) ) {
241 6         1166 my $bond_type = $moiety->get_edge_attribute( @$bond, 'bond' );
242 6 100       1164 if( $bond_type eq '=' ) {
    50          
243             # Test cis/trans bonds
244             # FIXME: Not sure how to check which definition belongs to
245             # which of the double bonds. See COD entry 1547257.
246 1         3 for my $atom (@$bond) {
247 2         12 my %bond_types = _neighbours_per_bond_type( $moiety,
248             $atom );
249 2         6 foreach ('/', '\\') {
250 4 100 100     27 if( $bond_types{$_} && @{$bond_types{$_}} > 1 ) {
  3         15  
251             warn sprintf 'atom %s(%d) has %d bonds of type \'%s\', ' .
252             'cis/trans definitions must not conflict' . "\n",
253             $atom->{symbol},
254             $atom->{number},
255 1         7 scalar @{$bond_types{$_}},
  1         19  
256             $_;
257             }
258             }
259             }
260             } elsif( $bond_type =~ /^[\\\/]$/ ) {
261             # Test if next to a double bond.
262             # FIXME: Yields false-positives for delocalised bonds,
263             # see COD entry 1501863.
264             # FIXME: What about triple bond? See COD entry 4103591.
265 5         11 my %bond_types;
266 5         13 for my $atom (@$bond) {
267 10         23 my %bond_types_now = _neighbours_per_bond_type( $moiety,
268             $atom );
269 10         27 for my $key (keys %bond_types_now) {
270 22         26 push @{$bond_types{$key}}, @{$bond_types_now{$key}};
  22         52  
  22         89  
271             }
272             }
273 5 100       20 if( !$bond_types{'='} ) {
274             warn sprintf 'cis/trans bond is defined between atoms ' .
275             '%s(%d) and %s(%d), but neither of them ' .
276             'is attached to a double bond' . "\n",
277             $A->{symbol},
278             $A->{number},
279             $B->{symbol},
280 1         12 $B->{number};
281             }
282             }
283             }
284             }
285              
286             # TODO: SP, TB, OH chiral centers
287             }
288              
289             sub _neighbours_per_bond_type
290             {
291 12     12   29 my( $moiety, $atom ) = @_;
292 12         22 my %bond_types;
293 12         33 for my $neighbour ($moiety->neighbours($atom)) {
294 38         1355 my $bond_type;
295 38 100       89 if( $moiety->has_edge_attribute( $atom, $neighbour, 'bond' ) ) {
296 24         4727 $bond_type = $moiety->get_edge_attribute( $atom, $neighbour, 'bond' );
297             } else {
298 14         2720 $bond_type = '';
299             }
300 38 100 100     4644 if( $bond_type =~ /^[\\\/]$/ &&
301             $atom->{number} > $neighbour->{number} ) {
302 7         23 $bond_type = toggle_cistrans $bond_type;
303             }
304 38         51 push @{$bond_types{$bond_type}}, $neighbour;
  38         112  
305             }
306 12         55 return %bond_types;
307             }
308              
309             1;
310              
311             __END__