File Coverage

blib/lib/Chemistry/File/SDF.pm
Criterion Covered Total %
statement 64 72 88.8
branch 16 24 66.6
condition n/a
subroutine 12 14 85.7
pod 7 9 77.7
total 99 119 83.1


line stmt bran cond sub pod time code
1             package Chemistry::File::SDF;
2              
3             our $VERSION = '0.24'; # VERSION
4             # $Id$
5              
6 1     1   60400 use base "Chemistry::File";
  1         11  
  1         508  
7 1     1   15370 use Chemistry::Mol;
  1         29854  
  1         49  
8 1     1   452 use Chemistry::File::MDLMol;
  1         2  
  1         7  
9 1     1   27 use strict;
  1         3  
  1         31  
10 1     1   5 use warnings;
  1         1  
  1         725  
11              
12             =head1 NAME
13              
14             Chemistry::File::SDF - MDL Structure Data File reader/writer
15              
16             =head1 SYNOPSIS
17              
18             use Chemistry::File::SDF;
19              
20             # Simple interface (all at once)
21             # read all the molecules in the file
22             my @mols = Chemistry::Mol->read('myfile.sdf');
23              
24             # assuming that the file includes a data item...
25             print $mols[0]->attr("sdf/data")->{PKA};
26              
27             # write a bunch of molecules to an SDF file
28             Chemistry::Mol->write('myfile.sdf', mols => \@mols);
29              
30             # or write just one molecule
31             $mol->write('myfile.sdf');
32              
33              
34             # Low level interface (one at a time)
35             # create reader
36             my $reader = Chemistry::Mol->file('myfile.sdf');
37             $reader->open('<');
38             while (my $mol = $reader->read_mol($reader->fh)) {
39             # do something with $mol
40             }
41              
42             =cut
43              
44             Chemistry::Mol->register_format(sdf => __PACKAGE__);
45              
46             =head1 DESCRIPTION
47              
48             MDL SDF (V2000) reader.
49              
50             This module automatically registers the 'sdf' format with Chemistry::Mol.
51              
52             The parser returns a list of Chemistry::Mol objects. SDF data can be accessed
53             by the $mol->attr method. Attribute names are stored as a hash ref at the
54             "sdf/data" attribute, as shown in the synopsis. When a data item has a single
55             line in the SDF file, the attribute is stored as a string; when there's more
56             than one line, they are stored as an array reference. The rest of the
57             information on the line that holds the field name is ignored.
58              
59             This module is part of the PerlMol project, L.
60              
61             =cut
62              
63             sub slurp_mol {
64 21     21 1 41 my ($self, $fh, %opts) = @_;
65 21 100       49 return if $fh->eof;
66 18         168 my $s;
67 18         171 while (<$fh>) {
68 1905 100       2608 last if /^\$\$\$\$/;
69 1887         2953 $s .= $_;
70             }
71 18         435 $s =~ s/\r\n?/\n/g; # normalize EOL
72 18         138 $s;
73             }
74              
75             sub skip_mol {
76 8     8 1 1561 my ($self, $fh, %opts) = @_;
77 8 100       20 return if $fh->eof;
78 7         148 while (<$fh>) {
79 771 100       1377 return 1 if /^\$\$\$\$/;
80             }
81 0         0 return 0;
82             }
83              
84             sub read_mol {
85 21     21 1 1357 my ($self, $fh, %opts) = @_;
86 21 100       51 my $s = $self->slurp_mol($fh, %opts) or return;
87 18         128 my $mol = Chemistry::File::MDLMol->parse_string($s, %opts);
88 18         788 $self->parse_data($mol, $s);
89 18         260 $mol;
90             }
91              
92             sub parse_data {
93 18     18 0 342 my ($self, $mol, $mol_string) = @_;
94 18         228 my (@items) = split /\n>/, $mol_string;
95 18         35 shift @items; # drop everything until first datum
96 18         32 my %data_block;
97 18         30 for my $item (@items) {
98 347         717 my ($header, @data) = split /\n/, $item;
99 347         980 my ($field_name) = $header =~ /<(.*?)>/g;
100 347 50       559 warn "SDF: no field name\n", next unless $field_name;
101             #$mol->attr("sdf/$field_name", @data == 1 ? $data[0] : \@data);
102 347 50       781 $data_block{$field_name} = @data == 1 ? $data[0] : \@data;
103            
104             }
105 18         53 $mol->attr("sdf/data", \%data_block);
106             }
107              
108             sub write_string {
109 2     2 1 2608 my ($self, $mol_ref, %opts) = @_;
110 2         7 my @mols;
111 2         4 my $ret = '';
112              
113 2 50       6 if ($opts{mols}) {
114 2         3 @mols = @{$opts{mols}};
  2         6  
115             } else {
116 0         0 @mols = $mol_ref;
117             }
118              
119 2         5 for my $mol (@mols) {
120 9         31 $ret .= $mol->print(format => 'mdl');
121 9         29 $ret .= format_data($mol->attr('sdf/data')) . '$$$$'."\n";
122             }
123 2         37 $ret;
124             }
125              
126             sub format_data {
127 9     9 0 71 my ($data) = @_;
128 9         11 my $ret = '';
129 9 50       18 return $ret unless $data;
130 9         73 for my $field_name (sort keys %$data) {
131 163         258 $ret .= "> <$field_name>\n";
132 163         186 my $value = $data->{$field_name};
133 163 50       199 if (ref $value) {
134 0         0 $ret .= join "\n", @$value;
135             } else {
136 163         185 $ret .= "$value\n";
137             }
138 163         197 $ret .= "\n";
139             }
140 9         75 $ret;
141             }
142              
143             sub file_is {
144 3     3 1 41 my ($self, $fname) = @_;
145            
146 3 50       21 return 1 if $fname =~ /\.sdf?$/i;
147 0           return 0;
148             }
149              
150             sub name_is {
151 0     0 1   my ($self, $fname) = @_;
152 0           $fname =~ /\.sdf?$/i;
153             }
154              
155             sub string_is {
156 0     0 1   my ($self, $s) = @_;
157 0 0         /\$\$\$\$/ ? 1 : 0;
158             }
159             1;
160              
161             =head1 CAVEATS
162              
163             Note that by storing the SDF data as a hash, there can be only one field with
164             a given name. The SDF format description is not entirely clear in this regard.
165             Also note that SDF data field names are considered to be case-sensitive.
166              
167             =head1 SOURCE CODE REPOSITORY
168              
169             L
170              
171             =head1 SEE ALSO
172              
173             L
174              
175             The MDL file format specification.
176             L or
177             Arthur Dalby et al., J. Chem. Inf. Comput. Sci, 1992, 32, 244-255.
178              
179             =head1 AUTHOR
180              
181             Ivan Tubert-Brohman
182              
183             =head1 COPYRIGHT
184              
185             Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is
186             free software; you can redistribute it and/or modify it under the same terms as
187             Perl itself.
188              
189             =cut
190