File Coverage

blib/lib/Chemistry/File/SDF.pm
Criterion Covered Total %
statement 64 72 88.8
branch 16 24 66.6
condition n/a
subroutine 12 14 85.7
pod 7 9 77.7
total 99 119 83.1


line stmt bran cond sub pod time code
1             package Chemistry::File::SDF;
2              
3             our $VERSION = '0.22'; # VERSION
4             # $Id$
5              
6 1     1   72950 use base "Chemistry::File";
  1         11  
  1         735  
7 1     1   18165 use Chemistry::Mol;
  1         34663  
  1         60  
8 1     1   575 use Chemistry::File::MDLMol;
  1         3  
  1         11  
9 1     1   35 use strict;
  1         2  
  1         20  
10 1     1   4 use warnings;
  1         2  
  1         832  
11              
12             =head1 NAME
13              
14             Chemistry::File::SDF - MDL Structure Data File reader/writer
15              
16             =head1 SYNOPSIS
17              
18             use Chemistry::File::SDF;
19              
20             # Simple interface (all at once)
21             # read all the molecules in the file
22             my @mols = Chemistry::Mol->read('myfile.sdf');
23              
24             # assuming that the file includes a data item...
25             print $mols[0]->attr("sdf/data")->{PKA};
26              
27             # write a bunch of molecules to an SDF file
28             Chemistry::Mol->write('myfile.sdf', mols => \@mols);
29              
30             # or write just one molecule
31             $mol->write('myfile.sdf');
32              
33              
34             # Low level interface (one at a time)
35             # create reader
36             my $reader = Chemistry::Mol->file('myfile.sdf');
37             $reader->open('<');
38             while (my $mol = $reader->read_mol($reader->fh)) {
39             # do something with $mol
40             }
41              
42             =cut
43              
44             Chemistry::Mol->register_format(sdf => __PACKAGE__);
45              
46             =head1 DESCRIPTION
47              
48             MDL SDF (V2000) reader.
49              
50             This module automatically registers the 'sdf' format with Chemistry::Mol.
51              
52             The parser returns a list of Chemistry::Mol objects. SDF data can be accessed
53             by the $mol->attr method. Attribute names are stored as a hash ref at the
54             "sdf/data" attribute, as shown in the synopsis. When a data item has a single
55             line in the SDF file, the attribute is stored as a string; when there's more
56             than one line, they are stored as an array reference. The rest of the
57             information on the line that holds the field name is ignored.
58              
59             This module is part of the PerlMol project, L.
60              
61             =cut
62              
63             sub slurp_mol {
64 21     21 1 55 my ($self, $fh, %opts) = @_;
65 21 100       67 return if $fh->eof;
66 18         168 my $s;
67 18         197 while (<$fh>) {
68 1905 100       3134 last if /^\$\$\$\$/;
69 1887         3664 $s .= $_;
70             }
71 18         535 $s =~ s/\r\n?/\n/g; # normalize EOL
72 18         162 $s;
73             }
74              
75             sub skip_mol {
76 8     8 1 2008 my ($self, $fh, %opts) = @_;
77 8 100       21 return if $fh->eof;
78 7         98 while (<$fh>) {
79 771 100       1635 return 1 if /^\$\$\$\$/;
80             }
81 0         0 return 0;
82             }
83              
84             sub read_mol {
85 21     21 1 1777 my ($self, $fh, %opts) = @_;
86 21 100       71 my $s = $self->slurp_mol($fh, %opts) or return;
87 18         150 my $mol = Chemistry::File::MDLMol->parse_string($s, %opts);
88 18         993 $self->parse_data($mol, $s);
89 18         317 $mol;
90             }
91              
92             sub parse_data {
93 18     18 0 440 my ($self, $mol, $mol_string) = @_;
94 18         342 my (@items) = split /\n>/, $mol_string;
95 18         48 shift @items; # drop everything until first datum
96 18         69 my %data_block;
97 18         52 for my $item (@items) {
98 347         840 my ($header, @data) = split /\n/, $item;
99 347         1606 my ($field_name) = $header =~ /<(.*?)>/g;
100 347 50       682 warn "SDF: no field name\n", next unless $field_name;
101             #$mol->attr("sdf/$field_name", @data == 1 ? $data[0] : \@data);
102 347 50       1013 $data_block{$field_name} = @data == 1 ? $data[0] : \@data;
103            
104             }
105 18         72 $mol->attr("sdf/data", \%data_block);
106             }
107              
108             sub write_string {
109 2     2 1 3961 my ($self, $mol_ref, %opts) = @_;
110 2         8 my @mols;
111 2         4 my $ret = '';
112              
113 2 50       8 if ($opts{mols}) {
114 2         5 @mols = @{$opts{mols}};
  2         8  
115             } else {
116 0         0 @mols = $mol_ref;
117             }
118              
119 2         6 for my $mol (@mols) {
120 9         49 $ret .= $mol->print(format => 'mdl');
121 9         46 $ret .= format_data($mol->attr('sdf/data')) . '$$$$'."\n";
122             }
123 2         72 $ret;
124             }
125              
126             sub format_data {
127 9     9 0 116 my ($data) = @_;
128 9         20 my $ret = '';
129 9 50       24 return $ret unless $data;
130 9         133 for my $field_name (sort keys %$data) {
131 163         301 $ret .= "> <$field_name>\n";
132 163         269 my $value = $data->{$field_name};
133 163 50       270 if (ref $value) {
134 0         0 $ret .= join "\n", @$value;
135             } else {
136 163         275 $ret .= "$value\n";
137             }
138 163         254 $ret .= "\n";
139             }
140 9         108 $ret;
141             }
142              
143             sub file_is {
144 3     3 1 59 my ($self, $fname) = @_;
145            
146 3 50       36 return 1 if $fname =~ /\.sdf?$/i;
147 0           return 0;
148             }
149              
150             sub name_is {
151 0     0 1   my ($self, $fname) = @_;
152 0           $fname =~ /\.sdf?$/i;
153             }
154              
155             sub string_is {
156 0     0 1   my ($self, $s) = @_;
157 0 0         /\$\$\$\$/ ? 1 : 0;
158             }
159             1;
160              
161             =head1 CAVEATS
162              
163             Note that by storing the SDF data as a hash, there can be only one field with
164             a given name. The SDF format description is not entirely clear in this regard.
165             Also note that SDF data field names are considered to be case-sensitive.
166              
167             =head1 SOURCE CODE REPOSITORY
168              
169             L
170              
171             =head1 SEE ALSO
172              
173             L
174              
175             The MDL file format specification.
176             L or
177             Arthur Dalby et al., J. Chem. Inf. Comput. Sci, 1992, 32, 244-255.
178              
179             =head1 AUTHOR
180              
181             Ivan Tubert-Brohman
182              
183             =head1 COPYRIGHT
184              
185             Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is
186             free software; you can redistribute it and/or modify it under the same terms as
187             Perl itself.
188              
189             =cut
190