File Coverage

blib/lib/Chemistry/File/SDF.pm
Criterion Covered Total %
statement 64 72 88.8
branch 16 24 66.6
condition n/a
subroutine 12 14 85.7
pod 7 9 77.7
total 99 119 83.1


line stmt bran cond sub pod time code
1             package Chemistry::File::SDF;
2              
3             our $VERSION = '0.23'; # VERSION
4             # $Id$
5              
6 1     1   72047 use base "Chemistry::File";
  1         12  
  1         696  
7 1     1   19093 use Chemistry::Mol;
  1         35084  
  1         60  
8 1     1   561 use Chemistry::File::MDLMol;
  1         3  
  1         8  
9 1     1   94 use strict;
  1         2  
  1         25  
10 1     1   5 use warnings;
  1         2  
  1         851  
11              
12             =head1 NAME
13              
14             Chemistry::File::SDF - MDL Structure Data File reader/writer
15              
16             =head1 SYNOPSIS
17              
18             use Chemistry::File::SDF;
19              
20             # Simple interface (all at once)
21             # read all the molecules in the file
22             my @mols = Chemistry::Mol->read('myfile.sdf');
23              
24             # assuming that the file includes a data item...
25             print $mols[0]->attr("sdf/data")->{PKA};
26              
27             # write a bunch of molecules to an SDF file
28             Chemistry::Mol->write('myfile.sdf', mols => \@mols);
29              
30             # or write just one molecule
31             $mol->write('myfile.sdf');
32              
33              
34             # Low level interface (one at a time)
35             # create reader
36             my $reader = Chemistry::Mol->file('myfile.sdf');
37             $reader->open('<');
38             while (my $mol = $reader->read_mol($reader->fh)) {
39             # do something with $mol
40             }
41              
42             =cut
43              
44             Chemistry::Mol->register_format(sdf => __PACKAGE__);
45              
46             =head1 DESCRIPTION
47              
48             MDL SDF (V2000) reader.
49              
50             This module automatically registers the 'sdf' format with Chemistry::Mol.
51              
52             The parser returns a list of Chemistry::Mol objects. SDF data can be accessed
53             by the $mol->attr method. Attribute names are stored as a hash ref at the
54             "sdf/data" attribute, as shown in the synopsis. When a data item has a single
55             line in the SDF file, the attribute is stored as a string; when there's more
56             than one line, they are stored as an array reference. The rest of the
57             information on the line that holds the field name is ignored.
58              
59             This module is part of the PerlMol project, L.
60              
61             =cut
62              
63             sub slurp_mol {
64 21     21 1 54 my ($self, $fh, %opts) = @_;
65 21 100       63 return if $fh->eof;
66 18         163 my $s;
67 18         218 while (<$fh>) {
68 1905 100       3165 last if /^\$\$\$\$/;
69 1887         3612 $s .= $_;
70             }
71 18         593 $s =~ s/\r\n?/\n/g; # normalize EOL
72 18         161 $s;
73             }
74              
75             sub skip_mol {
76 8     8 1 1945 my ($self, $fh, %opts) = @_;
77 8 100       22 return if $fh->eof;
78 7         102 while (<$fh>) {
79 771 100       1677 return 1 if /^\$\$\$\$/;
80             }
81 0         0 return 0;
82             }
83              
84             sub read_mol {
85 21     21 1 1737 my ($self, $fh, %opts) = @_;
86 21 100       74 my $s = $self->slurp_mol($fh, %opts) or return;
87 18         151 my $mol = Chemistry::File::MDLMol->parse_string($s, %opts);
88 18         935 $self->parse_data($mol, $s);
89 18         299 $mol;
90             }
91              
92             sub parse_data {
93 18     18 0 432 my ($self, $mol, $mol_string) = @_;
94 18         291 my (@items) = split /\n>/, $mol_string;
95 18         47 shift @items; # drop everything until first datum
96 18         39 my %data_block;
97 18         44 for my $item (@items) {
98 347         848 my ($header, @data) = split /\n/, $item;
99 347         1268 my ($field_name) = $header =~ /<(.*?)>/g;
100 347 50       687 warn "SDF: no field name\n", next unless $field_name;
101             #$mol->attr("sdf/$field_name", @data == 1 ? $data[0] : \@data);
102 347 50       995 $data_block{$field_name} = @data == 1 ? $data[0] : \@data;
103            
104             }
105 18         75 $mol->attr("sdf/data", \%data_block);
106             }
107              
108             sub write_string {
109 2     2 1 3826 my ($self, $mol_ref, %opts) = @_;
110 2         8 my @mols;
111 2         4 my $ret = '';
112              
113 2 50       8 if ($opts{mols}) {
114 2         4 @mols = @{$opts{mols}};
  2         7  
115             } else {
116 0         0 @mols = $mol_ref;
117             }
118              
119 2         5 for my $mol (@mols) {
120 9         41 $ret .= $mol->print(format => 'mdl');
121 9         38 $ret .= format_data($mol->attr('sdf/data')) . '$$$$'."\n";
122             }
123 2         50 $ret;
124             }
125              
126             sub format_data {
127 9     9 0 90 my ($data) = @_;
128 9         17 my $ret = '';
129 9 50       20 return $ret unless $data;
130 9         100 for my $field_name (sort keys %$data) {
131 163         253 $ret .= "> <$field_name>\n";
132 163         239 my $value = $data->{$field_name};
133 163 50       250 if (ref $value) {
134 0         0 $ret .= join "\n", @$value;
135             } else {
136 163         218 $ret .= "$value\n";
137             }
138 163         238 $ret .= "\n";
139             }
140 9         81 $ret;
141             }
142              
143             sub file_is {
144 3     3 1 55 my ($self, $fname) = @_;
145            
146 3 50       47 return 1 if $fname =~ /\.sdf?$/i;
147 0           return 0;
148             }
149              
150             sub name_is {
151 0     0 1   my ($self, $fname) = @_;
152 0           $fname =~ /\.sdf?$/i;
153             }
154              
155             sub string_is {
156 0     0 1   my ($self, $s) = @_;
157 0 0         /\$\$\$\$/ ? 1 : 0;
158             }
159             1;
160              
161             =head1 CAVEATS
162              
163             Note that by storing the SDF data as a hash, there can be only one field with
164             a given name. The SDF format description is not entirely clear in this regard.
165             Also note that SDF data field names are considered to be case-sensitive.
166              
167             =head1 SOURCE CODE REPOSITORY
168              
169             L
170              
171             =head1 SEE ALSO
172              
173             L
174              
175             The MDL file format specification.
176             L or
177             Arthur Dalby et al., J. Chem. Inf. Comput. Sci, 1992, 32, 244-255.
178              
179             =head1 AUTHOR
180              
181             Ivan Tubert-Brohman
182              
183             =head1 COPYRIGHT
184              
185             Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is
186             free software; you can redistribute it and/or modify it under the same terms as
187             Perl itself.
188              
189             =cut
190