File Coverage

Bio/Structure/IO/pdb.pm
Criterion Covered Total %
statement 592 751 78.8
branch 231 398 58.0
condition 79 165 47.8
subroutine 17 18 94.4
pod 2 2 100.0
total 921 1334 69.0


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Structure::IO::pdb
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Kris Boulez
7             #
8             # Copyright 2001, 2002 Kris Boulez
9             #
10             # Framework is a copy of Bio::SeqIO::embl.pm
11             #
12             # You may distribute this module under the same terms as perl itself
13              
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::Structure::IO::pdb - PDB input/output stream
19              
20             =head1 SYNOPSIS
21              
22             It is probably best not to use this object directly, but
23             rather go through the Bio::Structure::IO handler system. Go:
24              
25             $stream = Bio::Structure::IO->new(-file => $filename,
26             -format => 'PDB');
27              
28             while (my $structure = $stream->next_structure) {
29             # do something with $structure
30             }
31              
32             =head1 DESCRIPTION
33              
34             This object can transform Bio::Structure objects to and from PDB flat
35             file databases. The working is similar to that of the Bio::SeqIO handlers.
36              
37             =head1 FEEDBACK
38              
39             =head2 Mailing Lists
40              
41             User feedback is an integral part of the evolution of this and other
42             Bioperl modules. Send your comments and suggestions preferably to one
43             of the Bioperl mailing lists. Your participation is much appreciated.
44              
45             bioperl-l@bioperl.org - General discussion
46             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47              
48             =head2 Support
49              
50             Please direct usage questions or support issues to the mailing list:
51              
52             I
53              
54             rather than to the module maintainer directly. Many experienced and
55             reponsive experts will be able look at the problem and quickly
56             address it. Please include a thorough description of the problem
57             with code and data examples if at all possible.
58              
59             =head2 Reporting Bugs
60              
61             Report bugs to the Bioperl bug tracking system to help us keep track
62             the bugs and their resolution. Bug reports can be submitted via the
63             web:
64              
65             https://github.com/bioperl/bioperl-live/issues
66              
67             =head1 AUTHOR - Kris Boulez
68              
69             Email kris.boulez@algonomics.com
70              
71             =head1 APPENDIX
72              
73             The rest of the documentation details each of the object methods.
74             Internal methods are usually preceded with a _
75              
76             =cut
77              
78              
79             # Let the code begin...
80              
81              
82             package Bio::Structure::IO::pdb;
83 1     1   3 use strict;
  1         2  
  1         23  
84 1     1   310 use Bio::Structure::Entry;
  1         1  
  1         24  
85             #use Bio::Structure::Model;
86             #use Bio::Structure::Chain;
87             #use Bio::Structure::Residue;
88 1     1   306 use Bio::Structure::Atom;
  1         2  
  1         18  
89 1     1   359 use Bio::SeqFeature::Generic;
  1         1  
  1         25  
90 1     1   298 use Bio::Annotation::Reference;
  1         2  
  1         25  
91              
92 1     1   4 use base qw(Bio::Structure::IO);
  1         1  
  1         5604  
93              
94             sub _initialize {
95 3     3   5 my($self,@args) = @_;
96              
97 3         14 $self->SUPER::_initialize(@args);
98              
99 3         10 my ($noheader, $noatom) =
100             $self->_rearrange([qw(
101             NOHEADER
102             NOATOM
103             )],
104             @args);
105 3 50       8 $noheader && $self->_noheader($noheader);
106 3 50       8 $noatom && $self->_noatom($noatom);
107             }
108              
109              
110             =head2 next_structure;
111              
112             Title : next_structure
113             Usage : $struc = $stream->next_structure()
114             Function: returns the next structure in the stream
115             Returns : Bio::Structure object
116             Args :
117              
118             =cut
119              
120             sub next_structure {
121 2     2 1 4 my ($self,@args) = @_;
122 2         2 my ($line);
123 2         2 my ($obslte, $title, $caveat, $compnd, $source, $keywds,
124             $expdta, $author, %revdat, $revdat, $sprsde, $jrnl, %remark, $dbref,
125             $turn, $ssbond, $link, $hydbnd, $sltbrg, $cispep,
126             $site, $cryst1, $tvect,);
127 2         15 my $struc = Bio::Structure::Entry->new(-id => 'created from pdb.pm');
128 2         6 my $all_headers = ( !$self->_noheader ); # we'll parse all headers and store as annotation
129 2         2 my %header; # stores all header RECORDs an is stored as annotations when ATOM is reached
130              
131              
132 2         8 $line = $self->_readline; # This needs to be before the first eof() test
133              
134 2 50       4 if( !defined $line ) {
135 0         0 return; # no throws - end of file
136             }
137              
138 2 50       9 if( $line =~ /^\s+$/ ) {
139 0         0 while( defined ($line = $self->_readline) ) {
140 0 0       0 $line =~/\S/ && last;
141             }
142             }
143 2 50       3 if( !defined $line ) {
144 0         0 return; # end of file
145             }
146 2 50       8 $line =~ /^HEADER\s+\S+/ || $self->throw("PDB stream with no HEADER. Not pdb in my book");
147 2         8 my($header_line) = unpack "x10 a56", $line;
148 2         3 $header{'header'} = $header_line;
149 2         6 my($class, $depdate, $idcode) = unpack "x10 a40 a9 x3 a4", $line;
150 2         8 $idcode =~ s/^\s*(\S+)\s*$/$1/;
151 2         4 $struc->id($idcode);
152 2         11 $self->debug("PBD c $class d $depdate id $idcode\n"); # XXX KB
153              
154 2         2 my $buffer = $line;
155              
156             BEFORE_COORDINATES :
157 2         5 until( !defined $buffer ) {
158 292         205 $_ = $buffer;
159              
160             # Exit at start of coordinate section
161 292 100       508 last if /^(MODEL|ATOM|HETATM)/;
162              
163             # OBSLTE line(s)
164 290 50 33     389 if (/^OBSLTE / && $all_headers) {
165 0         0 $obslte = $self->_read_PDB_singlecontline("OBSLTE","12-70",\$buffer);
166 0         0 $header{'obslte'} = $obslte;
167             }
168              
169             # TITLE line(s)
170 290 100 66     393 if (/^TITLE / && $all_headers) {
171 1         4 $title = $self->_read_PDB_singlecontline("TITLE","11-70",\$buffer);
172 1         2 $header{'title'} = $title;
173             }
174              
175             # CAVEAT line(s)
176 290 50 33     373 if (/^CAVEAT / && $all_headers) {
177 0         0 $caveat = $self->_read_PDB_singlecontline("CAVEAT","12-70",\$buffer);
178 0         0 $header{'caveat'} = $caveat;
179             }
180              
181             # COMPND line(s)
182 290 100 66     364 if (/^COMPND / && $all_headers) {
183 2         4 $compnd = $self->_read_PDB_singlecontline("COMPND","11-70",\$buffer);
184 2         3 $header{'compnd'} = $compnd;
185 2         23 $self->debug("get COMPND $compnd\n");
186             }
187              
188             # SOURCE line(s)
189 290 100 66     398 if (/^SOURCE / && $all_headers) {
190 2         4 $source = $self->_read_PDB_singlecontline("SOURCE","11-70",\$buffer);
191 2         4 $header{'source'} = $source;
192             }
193              
194             # KEYWDS line(s)
195 290 100 66     381 if (/^KEYWDS / && $all_headers) {
196 1         4 $keywds = $self->_read_PDB_singlecontline("KEYWDS","11-70",\$buffer);
197 1         2 $header{'keywds'} = $keywds;
198             }
199              
200             # EXPDTA line(s)
201 290 100 66     396 if (/^EXPDTA / && $all_headers) {
202 1         3 $expdta = $self->_read_PDB_singlecontline("EXPDTA","11-70",\$buffer);
203 1         3 $header{'expdta'} = $expdta;
204             }
205              
206             # AUTHOR line(s)
207 290 100 66     384 if (/^AUTHOR / && $all_headers) {
208 2         4 $author = $self->_read_PDB_singlecontline("AUTHOR","11-70",\$buffer);
209 2         3 $header{'author'} = $author;
210             }
211              
212             # REVDAT line(s)
213             # a bit more elaborate as we also store the modification number
214 290 100 66     378 if (/^REVDAT / && $all_headers) {
215             ##my($modnum,$rol) = unpack "x7 A3 x3 A53", $_;
216             ##$modnum =~ s/\s+//; # remove spaces
217             ##$revdat{$modnum} .= $rol;
218 3         6 my ($rol) = unpack "x7 a59", $_;
219 3         6 $revdat .= $rol;
220 3         4 $header{'revdat'} = $revdat;
221             }
222              
223             # SPRSDE line(s)
224 290 50 33     401 if (/^SPRSDE / && $all_headers) {
225 0         0 $sprsde = $self->_read_PDB_singlecontline("SPRSDE","12-70",\$buffer);
226 0         0 $header{'sprsde'} = $sprsde;
227             }
228              
229             # jRNL line(s)
230 290 100 66     385 if (/^JRNL / && $all_headers) {
231 2         5 $jrnl = $self->_read_PDB_jrnl(\$buffer);
232 2         7 $struc->annotation->add_Annotation('reference',$jrnl);
233 2         2 $header{'jrnl'} = 1; # when writing out, we need a way to check there was a JRNL record (not mandatory)
234             }
235              
236             # REMARK line(s)
237             # we only parse the "REMARK 1" lines (additional references)
238             # thre rest is stored in %remark (indexed on remarkNum) (pack does space-padding)
239 290 100 66     896 if (/^REMARK\s+(\d+)\s*/ && $all_headers) {
240 240         259 my $remark_num = $1;
241 240 100       349 if ($remark_num == 1) {
242 2         6 my @refs = $self->_read_PDB_remark_1(\$buffer);
243             # How can we find the primary reference when writing (JRNL record) XXX KB
244 2         4 foreach my $ref (@refs) {
245 8         10 $struc->annotation->add_Annotation('reference', $ref);
246             }
247             # $_ still holds the REMARK_1 line, $buffer now contains the first non
248             # REMARK_1 line. We need to parse it in this pass (so no else block)
249 2         3 $_ = $buffer;
250             }
251             # for the moment I don't see a better solution (other then using goto)
252 240 50       423 if (/^REMARK\s+(\d+)\s*/) {
253 240         197 my $r_num = $1;
254 240 50       269 if ($r_num != 1) { # other remarks, we store literlly at the moment
255 240         391 my ($rol) = unpack "x11 a59", $_;
256 240         406 $remark{$r_num} .= $rol;
257             }
258             }
259             } # REMARK
260              
261             # DBREF line(s)
262             # references to sequences in other databases
263             # we store as 'dblink' annotations and whole line as simple annotation (round-trip)
264 290 100 66     429 if (/^DBREF / && $all_headers) {
265 3         8 my ($rol) = unpack "x7 a61", $_;
266 3         5 $dbref .= $rol;
267 3         4 $header{'dbref'} = $dbref;
268 3         7 my ($db, $acc) = unpack "x26 a6 x1 a8", $_;
269 3         8 $db =~ s/\s*$//;
270 3         7 $acc =~ s/\s*$//;
271 3         10 my $link = Bio::Annotation::DBLink->new;
272 3         6 $link->database($db);
273 3         5 $link->primary_id($acc);
274 3         6 $struc->annotation->add_Annotation('dblink', $link);
275             } # DBREF
276              
277             # SEQADV line(s)
278 290 50 33     396 if (/^SEQADV / && $all_headers) {
279 0         0 my ($rol) = unpack "x7 a63", $_;
280 0         0 $header{'seqadv'} .= $rol;
281             } # SEQADV
282              
283             # SEQRES line(s)
284             # this is (I think) the sequence of macromolecule that was analysed
285             # this will be returned when doing $struc->seq
286 290 100 66     434 if (/^SEQRES / && $all_headers) {
287 8         16 my ($rol) = unpack "x8 a62", $_;
288 8         11 $header{'seqres'} .= $rol;
289             } # SEQRES
290              
291             # MODRES line(s)
292 290 50 33     381 if (/^MODRES / && $all_headers) {
293 0         0 my ($rol) = unpack "x7 a63", $_;
294 0         0 $header{'modres'} .= $rol;
295             } # MODRES
296              
297             # HET line(s)
298 290 100 66     404 if (/^HET / && $all_headers) {
299 3         6 my ($rol) = unpack "x7 a63", $_;
300 3         10 $header{'het'} .= $rol;
301             } # HET
302              
303             # HETNAM line(s)
304 290 100 66     373 if (/^HETNAM / && $all_headers) {
305 1         2 my ($rol) = unpack "x8 a62", $_;
306 1         3 $header{'hetnam'} .= $rol;
307             } # HETNAM
308              
309             # HETSYN line(s)
310 290 50 33     384 if (/^HETSYN / && $all_headers) {
311 0         0 my ($rol) = unpack "x8 a62", $_;
312 0         0 $header{'hetsyn'} .= $rol;
313             } # HETSYN
314              
315             # FORMUL line(s)
316 290 100 66     364 if (/^FORMUL / && $all_headers) {
317 4         8 my ($rol) = unpack "x8 a62", $_;
318 4         6 $header{'formul'} .= $rol;
319             } # FORMUL
320              
321             # HELIX line(s)
322             # store as specific object ??
323 290 100 66     420 if (/^HELIX / && $all_headers) {
324 2         4 my ($rol) = unpack "x7 a69", $_;
325 2         3 $header{'helix'} .= $rol;
326             } # HELIX
327              
328             # SHEET line(s)
329             # store as specific object ??
330 290 100 66     374 if (/^SHEET / && $all_headers) {
331 3         5 my ($rol) = unpack "x7 a63", $_;
332 3         4 $header{'sheet'} .= $rol;
333             } # SHEET
334              
335             # TURN line(s)
336             # store as specific object ??
337 290 50 33     403 if (/^TURN / && $all_headers) {
338 0         0 my ($rol) = unpack "x7 a63", $_;
339 0         0 $turn .= $rol;
340 0         0 $header{'turn'} = $turn;
341             } # TURN
342              
343             # SSBOND line(s)
344             # store in connection-like object (see parsing of CONECT record)
345 290 100 66     413 if (/^SSBOND / && $all_headers) {
346 3         5 my ($rol) = unpack "x7 a65", $_;
347 3         5 $ssbond .= $rol;
348 3         3 $header{'ssbond'} = $ssbond;
349             } # SSBOND
350              
351             # LINK
352             # store like SSBOND ?
353 290 100 66     431 if (/^LINK / && $all_headers) {
354 3         5 my ($rol) = unpack "x12 a60", $_;
355 3         6 $link .= $rol;
356 3         4 $header{'link'} = $link;
357             } # LINK
358              
359             # HYDBND
360             # store like SSBOND
361 290 50 33     386 if (/^HYDBND / && $all_headers) {
362 0         0 my ($rol) = unpack "x12 a60", $_;
363 0         0 $hydbnd .= $rol;
364 0         0 $header{'hydbnd'} = $hydbnd;
365             } # HYDBND
366              
367             # SLTBRG
368             # store like SSBOND ?
369 290 50 33     378 if (/^SLTBRG / && $all_headers) {
370 0         0 my ($rol) = unpack "x12 a60",$_;
371 0         0 $sltbrg .= $rol;
372 0         0 $header{'sltbrg'} = $sltbrg;
373             } # SLTBRG
374              
375             # CISPEP
376             # store like SSBOND ?
377 290 50 33     424 if (/^CISPEP / && $all_headers) {
378 0         0 my ($rol) = unpack "x7 a52", $_;
379 0         0 $cispep .= $rol;
380 0         0 $header{'cispep'} = $cispep;
381             }
382              
383             # SITE line(s)
384 290 50 33     384 if (/^SITE / && $all_headers) {
385 0         0 my ($rol) = unpack "x7 a54", $_;
386 0         0 $site .= $rol;
387 0         0 $header{'site'} = $site;
388             } # SITE
389              
390             # CRYST1 line
391             # store in some crystallographic subobject ?
392 290 100 66     412 if (/^CRYST1/ && $all_headers) {
393 2         5 my ($rol) = unpack "x6 a64", $_;
394 2         3 $cryst1 .= $rol;
395 2         3 $header{'cryst1'} = $cryst1;
396             } # CRYST1
397              
398             # ORIGXn line(s) (n=1,2,3)
399 290 100 66     410 if (/^(ORIGX\d) / && $all_headers) {
400 6         10 my $origxn = lc($1);
401 6         12 my ($rol) = unpack "x10 a45", $_;
402 6         12 $header{$origxn} .= $rol;
403             } # ORIGXn
404              
405             # SCALEn line(s) (n=1,2,3)
406 290 100 66     389 if (/^(SCALE\d) / && $all_headers) {
407 6         9 my $scalen = lc($1);
408 6         12 my ($rol) = unpack "x10 a45", $_;
409 6         13 $header{$scalen} .= $rol;
410             } # SCALEn
411              
412             # MTRIXn line(s) (n=1,2,3)
413 290 50 33     392 if (/^(MTRIX\d) / && $all_headers) {
414 0         0 my $mtrixn = lc($1);
415 0         0 my ($rol) = unpack "x7 a53", $_;
416 0         0 $header{$mtrixn} .= $rol;
417             } # MTRIXn
418              
419             # TVECT line(s)
420 290 100 66     408 if (/^TVECT / && $all_headers) {
421 1         2 my ($rol) = unpack "x7 a63", $_;
422 1         2 $tvect .= $rol;
423 1         1 $header{'tvect'} = $tvect;
424             }
425              
426             # Get next line.
427 290         398 $buffer = $self->_readline;
428             }
429              
430             # store %header entries a annotations
431 2 50       5 if (%header) {
432 2         13 for my $record (keys %header) {
433 42         58 my $sim = Bio::Annotation::SimpleValue->new();
434 42         59 $sim->value($header{$record});
435 42         49 $struc->annotation->add_Annotation($record, $sim);
436             }
437             }
438             # store %remark entries as annotations
439 2 50       6 if (%remark) {
440 2         5 for my $remark_num (keys %remark) {
441 17         23 my $sim = Bio::Annotation::SimpleValue->new();
442 17         21 $sim->value($remark{$remark_num});
443 17         21 $struc->annotation->add_Annotation("remark_$remark_num", $sim);
444             }
445             }
446              
447             # Coordinate section, the real meat
448             #
449             # $_ contains a line beginning with (ATOM|MODEL)
450              
451 2         3 $buffer = $_;
452              
453              
454 2 50 33     15 if (defined($buffer) && $buffer =~ /^(ATOM |MODEL |HETATM)/ ) { # can you have an entry without ATOM ?
455 2         5 while( defined ($buffer) ) { # (yes : 1a7z )
456             # read in one model at a time
457 2         5 my $model = $self->_read_PDB_coordinate_section(\$buffer, $struc);
458             # add this to $struc
459 2         9 $struc->add_model($struc, $model);
460              
461 2 50 33     16 if ($buffer && $buffer !~ /^MODEL /) { # if we get here we have multiple MODELs
462 2         5 last;
463             }
464             }
465             }
466             else {
467 0         0 $self->throw("Could not find a coordinate section in this record\n");
468             }
469              
470              
471 2         5 until( !defined $buffer ) {
472 23         16 $_ = $buffer;
473              
474             # CONNECT records
475 23 100       50 if (/^CONECT/) {
476             # do not differentiate between different type of connect (column dependant)
477 19         17 my $conect_unpack = "x6 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5";
478 19         71 my (@conect) = unpack $conect_unpack, $_;
479 19         34 for my $k (0 .. $#conect) {
480 209         396 $conect[$k] =~ s/\s//g;
481             }
482 19         16 my $source = shift @conect;
483 19         11 my $type;
484 19         20 for my $k (0 .. 9) {
485 190 100       261 next unless ($conect[$k] =~ /^\d+$/);
486             # 0..3 bond
487 32 50 0     33 if( $k <= 3 ) {
    0 0        
    0 0        
      0        
488 32         27 $type = "bond";
489             }
490             # 4..5,7..8 hydrogen bonded
491             elsif( ($k >= 4 && $k <= 5) || ($k >= 7 && $k <= 8) ) {
492 0         0 $type = "hydrogen";
493             }
494             # 6, 9 salt bridged
495             elsif( $k == 6 || $k == 9 ) {
496 0         0 $type = "saltbridged";
497             } else {
498 0         0 $self->throw("k has impossible value ($k), check brain");
499             }
500 32         47 $struc->conect($source, $conect[$k], $type);
501             }
502             }
503              
504             # MASTER record
505 23 100       30 if (/^MASTER /) {
506             # the numbers in here a checksums, we should use them :)
507 2         5 my ($rol) = unpack "x10 a60", $_;
508 2         9 $struc->master($rol);
509             }
510              
511 23 100       39 if (/^END/) {
512             # this it the end ...
513             }
514              
515 23         39 $buffer = $self->_readline;
516             }
517              
518              
519 2         38 return $struc;
520             }
521              
522             =head2 write_structure
523              
524             Title : write_structure
525             Usage : $stream->write_structure($struc)
526             Function: writes the $struc object (must be a Bio::Structure) to the stream
527             Returns : 1 for success and 0 for error
528             Args : Bio::Structure object
529              
530             =cut
531              
532             sub write_structure {
533 1     1 1 5 my ($self, $struc) = @_;
534 1 50       4 if( !defined $struc ) {
535 0         0 $self->throw("Attempting to write with no structure!");
536             }
537              
538 1 50 33     9 if( ! ref $struc || ! $struc->isa('Bio::Structure::StructureI') ) {
539 0         0 $self->throw(" $struc is not a StructureI compliant module.");
540             }
541 1         2 my ($ann, $string, $output_string, $key);
542             # HEADER
543 1         3 ($ann) = $struc->annotation->get_Annotations("header");
544 1 50       3 if (defined $ann) {
545 1         4 $string = $ann->as_text;
546 1         4 $string =~ s/^Value: //;
547 1         10 $output_string = pack ("A10 A56", "HEADER", $string);
548             } else { # not read in via read_structure, create HEADER line
549 0         0 my $id = $struc->id;
550 0 0       0 if (!$id) {
551 0         0 $id = "UNK1";
552             }
553 0 0       0 if (length($id) > 4) {
554 0         0 $id = substr($id,0,4);
555             }
556 0         0 my $classification = "DEFAULT CLASSIFICATION";
557 0         0 my $dep_date = "24-JAN-70";
558 0         0 $output_string = pack ("A10 A40 A12 A4", "HEADER", $classification, $dep_date, $id);
559             }
560 1         4 $output_string .= " " x (80 - length($output_string) );
561 1         17 $self->_print("$output_string\n");
562              
563 1         1 my (%header);
564 1         4 for $key ($struc->annotation->get_all_annotation_keys) {
565 38         34 $header{$key} = 1;;
566             }
567              
568 1 50       4 exists $header{'obslte'} && $self->_write_PDB_simple_record(-name => "OBSLTE ", -cont => "9-10",
569             -annotation => $struc->annotation->get_Annotations("obslte"), -rol => "11-70");
570              
571 1 50       6 exists $header{'title'} && $self->_write_PDB_simple_record(-name => "TITLE ", -cont => "9-10",
572             -annotation => $struc->annotation->get_Annotations("title"), -rol => "11-70");
573              
574 1 50       4 exists $header{'caveat'} && $self->_write_PDB_simple_record(-name => "CAVEAT ", -cont => "9-10",
575             -annotation => $struc->annotation->get_Annotations("caveat"), -rol => "12-70");
576              
577 1 50       7 exists $header{'compnd'} && $self->_write_PDB_simple_record(-name => "COMPND ", -cont => "9-10",
578             -annotation => $struc->annotation->get_Annotations("compnd"), -rol => "11-70");
579              
580 1 50       7 exists $header{'source'} && $self->_write_PDB_simple_record(-name => "SOURCE ", -cont => "9-10",
581             -annotation => $struc->annotation->get_Annotations("source"), -rol => "11-70");
582              
583 1 50       7 exists $header{'keywds'} && $self->_write_PDB_simple_record(-name => "KEYWDS ", -cont => "9-10",
584             -annotation => $struc->annotation->get_Annotations("keywds"), -rol => "11-70");
585              
586 1 50       7 exists $header{'expdta'} && $self->_write_PDB_simple_record(-name => "EXPDTA ", -cont => "9-10",
587             -annotation => $struc->annotation->get_Annotations("expdta"), -rol => "11-70");
588              
589 1 50       8 exists $header{'author'} && $self->_write_PDB_simple_record(-name => "AUTHOR ", -cont => "9-10",
590             -annotation => $struc->annotation->get_Annotations("author"), -rol => "11-70");
591              
592 1 50       8 exists $header{'revdat'} && $self->_write_PDB_simple_record(-name => "REVDAT ",
593             -annotation => $struc->annotation->get_Annotations("revdat"), -rol => "8-66");
594              
595 1 50       4 exists $header{'sprsde'} && $self->_write_PDB_simple_record(-name => "SPRSDE ", -cont => "9-10",
596             -annotation => $struc->annotation->get_Annotations("sprsde"), -rol => "12-70");
597              
598             # JRNL en REMARK 1
599 1         2 my ($jrnl_done, $remark_1_counter);
600 1 50       4 if ( !exists $header{'jrnl'} ) {
601 0         0 $jrnl_done = 1;
602             }
603 1         4 foreach my $ref ($struc->annotation->get_Annotations('reference') ) {
604 1 50       3 if( !$jrnl_done ) { # JRNL record
605 1 50       5 $ref->authors && $self->_write_PDB_simple_record(-name => "JRNL AUTH",
606             -cont => "17-18", -rol => "20-70", -string => $ref->authors );
607 1 50       5 $ref->title && $self->_write_PDB_simple_record(-name => "JRNL TITL",
608             -cont => "17-18", -rol => "20-70", -string => $ref->title );
609 1 50       4 $ref->editors && $self->_write_PDB_simple_record(-name => "JRNL EDIT",
610             -cont => "17-18", -rol => "20-70", -string => $ref->editors );
611 1 50       5 $ref->location && $self->_write_PDB_simple_record(-name => "JRNL REF ",
612             -cont => "17-18", -rol => "20-70", -string => $ref->location );
613 1 50       3 $ref->editors && $self->_write_PDB_simple_record(-name => "JRNL EDIT",
614             -cont => "17-18", -rol => "20-70", -string => $ref->editors );
615 1 50       44 $ref->encoded_ref && $self->_write_PDB_simple_record(-name => "JRNL REFN",
616             -cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
617 1         4 $jrnl_done = 1;
618             } else { # REMARK 1
619 0 0       0 if (!$remark_1_counter) { # header line
620 0         0 my $remark_1_header_line = "REMARK 1" . " " x 70;
621 0         0 $self->_print("$remark_1_header_line\n");
622 0         0 $remark_1_counter = 1;
623             }
624             # per reference header
625 0         0 my $rem_line = "REMARK 1 REFERENCE " . $remark_1_counter;
626 0         0 $rem_line .= " " x (80 - length($rem_line) );
627 0         0 $self->_print($rem_line,"\n");
628 0 0       0 $ref->authors && $self->_write_PDB_simple_record(-name => "REMARK 1 AUTH",
629             -cont => "17-18", -rol => "20-70", -string => $ref->authors );
630 0 0       0 $ref->title && $self->_write_PDB_simple_record(-name => "REMARK 1 TITL",
631             -cont => "17-18", -rol => "20-70", -string => $ref->title );
632 0 0       0 $ref->editors && $self->_write_PDB_simple_record(-name => "REMARK 1 EDIT",
633             -cont => "17-18", -rol => "20-70", -string => $ref->editors );
634 0 0       0 $ref->location && $self->_write_PDB_simple_record(-name => "REMARK 1 REF ",
635             -cont => "17-18", -rol => "20-70", -string => $ref->location );
636 0 0       0 $ref->editors && $self->_write_PDB_simple_record(-name => "REMARK 1 EDIT",
637             -cont => "17-18", -rol => "20-70", -string => $ref->editors );
638 0 0       0 $ref->encoded_ref && $self->_write_PDB_simple_record(-name => "REMARK 1 REFN",
639             -cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
640 0         0 $remark_1_counter++;
641             }
642             }
643 1 50       3 if (! defined $remark_1_counter ) { # no remark 1 record written yet
644 1         2 my $remark_1_header_line = "REMARK 1" . " " x 70;
645 1         3 $self->_print("$remark_1_header_line\n"); # write dummy (we need this line)
646             }
647              
648             # REMARK's (not 1 at the moment, references)
649 1         1 my (%remarks, $remark_num);
650 1         6 for $key (keys %header) {
651 38 100       51 next unless ($key =~ /^remark_(\d+)$/);
652 13 50       20 next if ($1 == 1);
653 13         15 $remarks{$1} = 1;
654             }
655 1         7 for $remark_num (sort {$a <=> $b} keys %remarks) {
  32         23  
656 13         17 $self->_write_PDB_remark_record($struc, $remark_num);
657             }
658              
659 1 50       8 exists $header{'dbref'} && $self->_write_PDB_simple_record(-name => "DBREF ",
660             -annotation => $struc->annotation->get_Annotations("dbref"), -rol => "8-68");
661 1 50       4 exists $header{'seqadv'} && $self->_write_PDB_simple_record(-name => "SEQADV ",
662             -annotation => $struc->annotation->get_Annotations("seqadv"), -rol => "8-70");
663 1 50       6 exists $header{'seqres'} && $self->_write_PDB_simple_record(-name => "SEQRES ",
664             -annotation => $struc->annotation->get_Annotations("seqres"), -rol => "9-70");
665 1 50       4 exists $header{'modres'} && $self->_write_PDB_simple_record(-name => "MODRES ",
666             -annotation => $struc->annotation->get_Annotations("modres"), -rol => "8-70");
667 1 50       6 exists $header{'het'} && $self->_write_PDB_simple_record(-name => "HET ",
668             -annotation => $struc->annotation->get_Annotations("het"), -rol => "8-70");
669 1 50       7 exists $header{'hetnam'} && $self->_write_PDB_simple_record(-name => "HETNAM ",
670             -annotation => $struc->annotation->get_Annotations("hetnam"), -rol => "9-70");
671 1 50       4 exists $header{'hetsyn'} && $self->_write_PDB_simple_record(-name => "HETSYN ",
672             -annotation => $struc->annotation->get_Annotations("hetsyn"), -rol => "9-70");
673 1 50       7 exists $header{'formul'} && $self->_write_PDB_simple_record(-name => "FORMUL ",
674             -annotation => $struc->annotation->get_Annotations("formul"), -rol => "9-70");
675 1 50       5 exists $header{'helix'} && $self->_write_PDB_simple_record(-name => "HELIX ",
676             -annotation => $struc->annotation->get_Annotations("helix"), -rol => "8-76");
677 1 50       4 exists $header{'sheet'} && $self->_write_PDB_simple_record(-name => "SHEET ",
678             -annotation => $struc->annotation->get_Annotations("sheet"), -rol => "8-70");
679 1 50       4 exists $header{'turn'} && $self->_write_PDB_simple_record(-name => "TURN ",
680             -annotation => $struc->annotation->get_Annotations("turn"), -rol => "8-70");
681 1 50       3 exists $header{'ssbond'} && $self->_write_PDB_simple_record(-name => "SSBOND ",
682             -annotation => $struc->annotation->get_Annotations("ssbond"), -rol => "8-72");
683 1 50       6 exists $header{'link'} && $self->_write_PDB_simple_record(-name => "LINK ",
684             -annotation => $struc->annotation->get_Annotations("link"), -rol => "13-72");
685 1 50       4 exists $header{'hydbnd'} && $self->_write_PDB_simple_record(-name => "HYDBND ",
686             -annotation => $struc->annotation->get_Annotations("hydbnd"), -rol => "13-72");
687 1 50       3 exists $header{'sltbrg'} && $self->_write_PDB_simple_record(-name => "SLTBRG ",
688             -annotation => $struc->annotation->get_Annotations("sltbrg"), -rol => "13-72");
689 1 50       4 exists $header{'cispep'} && $self->_write_PDB_simple_record(-name => "CISPEP ",
690             -annotation => $struc->annotation->get_Annotations("cispep"), -rol => "8-59");
691 1 50       3 exists $header{'site'} && $self->_write_PDB_simple_record(-name => "SITE ",
692             -annotation => $struc->annotation->get_Annotations("site"), -rol => "8-61");
693 1 50       6 exists $header{'cryst1'} && $self->_write_PDB_simple_record(-name => "CRYST1",
694             -annotation => $struc->annotation->get_Annotations("cryst1"), -rol => "7-70");
695 1         4 for my $k (1..3) {
696 3         4 my $origxn = "origx".$k;
697 3         5 my $ORIGXN = uc($origxn)." ";
698 3 50       12 exists $header{$origxn} && $self->_write_PDB_simple_record(-name => $ORIGXN,
699             -annotation => $struc->annotation->get_Annotations($origxn), -rol => "11-55");
700             }
701 1         3 for my $k (1..3) {
702 3         5 my $scalen = "scale".$k;
703 3         4 my $SCALEN = uc($scalen)." ";
704 3 50       12 exists $header{$scalen} && $self->_write_PDB_simple_record(-name => $SCALEN,
705             -annotation => $struc->annotation->get_Annotations($scalen), -rol => "11-55");
706             }
707 1         3 for my $k (1..3) {
708 3         2 my $mtrixn = "mtrix".$k;
709 3         4 my $MTRIXN = uc($mtrixn)." ";
710 3 50       7 exists $header{$mtrixn} && $self->_write_PDB_simple_record(-name => $MTRIXN,
711             -annotation => $struc->annotation->get_Annotations($mtrixn), -rol => "8-60");
712             }
713 1 50       7 exists $header{'tvect'} && $self->_write_PDB_simple_record(-name => "TVECT ",
714             -annotation => $struc->annotation->get_Annotations("tvect"), -rol => "8-70");
715              
716             # write out coordinate section
717             #
718 1         1 my %het_res; # hetero residues
719 1         3 $het_res{'HOH'} = 1; # water is default
720 1 50       5 if (exists $header{'het'}) {
721 1         3 my ($het_line) = ($struc->annotation->get_Annotations("het"))[0]->as_text;
722 1         4 $het_line =~ s/^Value: //;
723 1         4 for ( my $k = 0; $k <= length $het_line ; $k += 63) {
724 3         4 my $l = substr $het_line, $k, 63;
725 3         8 $l =~ s/^\s*(\S+)\s+.*$/$1/;
726 3         7 $het_res{$l} = 1;
727             }
728             }
729 1         4 for my $model ($struc->get_models) {
730             # more then one model ?
731 1 50       2 if ($struc->get_models > 1) {
732 0         0 my $model_line = sprintf("MODEL %4d", $model->id);
733 0         0 $model_line .= " " x (80 - length($model_line) );
734 0         0 $self->_print($model_line, "\n");
735             }
736 1         3 for my $chain ($struc->get_chains($model)) {
737 4         4 my ($residue, $atom, $resname, $resnum, $atom_line, $atom_serial, $atom_icode, $chain_id);
738 0         0 my ($prev_resname, $prev_resnum, $prev_atomicode); # need these for TER record
739 4         5 my $last_record = ""; # Used to spot an ATOM -> HETATM change within a chain
740 4         10 $chain_id = $chain->id;
741 4 100       9 if ( $chain_id eq "default" ) {
742 1         2 $chain_id = " ";
743             }
744 4         20 $self->debug("model_id: $model->id chain_id: $chain_id\n");
745 4         10 for $residue ($struc->get_residues($chain)) {
746 60         340 ($resname, $resnum) = split /-/, $residue->id;
747 60         117 for $atom ($struc->get_atoms($residue)) {
748 171 100       205 if ($het_res{$resname}) { # HETATM
749 45 50 66     89 if ( $resname ne "HOH" && $last_record eq "ATOM " ) {
750             # going from ATOM -> HETATM, we have to write TER
751 0         0 my $ter_line = "TER ";
752 0         0 $ter_line .= sprintf("%5d", $atom_serial + 1);
753 0         0 $ter_line .= " ";
754 0         0 $ter_line .= sprintf("%3s ", $prev_resname);
755 0         0 $ter_line .= $chain_id;
756 0         0 $ter_line .= sprintf("%4d", $prev_resnum);
757 0 0       0 $ter_line .= $atom_icode ? $prev_atomicode : " "; # 27
758 0         0 $ter_line .= " " x (80 - length $ter_line); # extend to 80 chars
759 0         0 $self->_print($ter_line,"\n");
760             }
761 45         60 $atom_line = "HETATM";
762             } else {
763 126         108 $atom_line = "ATOM ";
764             }
765 171         113 $last_record = $atom_line;
766 171         246 $atom_line .= sprintf("%5d ", $atom->serial);
767 171         218 $atom_serial = $atom->serial; # we need it for TER record
768 171         196 $atom_icode = $atom->icode;
769             # remember some stuff if next iteration needs writing TER
770 171         124 $prev_resname = $resname;
771 171         119 $prev_resnum = $resnum;
772 171         104 $prev_atomicode = $atom_icode;
773             # getting the name of the atom correct is subtrivial
774 171         215 my $atom_id = $atom->id;
775             # is pdb_atomname set, then use this (most probably set when
776             # reading in the PDB record)
777 171         209 my $pdb_atomname = $atom->pdb_atomname;
778 171 50       193 if( defined $pdb_atomname ) {
779 171         191 $atom_line .= sprintf("%-4s", $pdb_atomname);
780             } else {
781             # start (educated) guessing
782 0         0 my $element = $atom->element;
783 0 0 0     0 if( defined $element && $element ne "H") {
784             # element should be at first two positions (right justified)
785             # ie. Calcium should be "CA "
786             # C alpha should be " CA "
787 0 0       0 if( length($element) == 2 ) {
788 0         0 $atom_line .= sprintf("%-4s", $atom->id);
789             } else {
790 0         0 $atom_line .= sprintf(" %-3s", $atom->id);
791             }
792             } else { # old behaviour do a best guess
793 0 0       0 if ($atom->id =~ /^\dH/) { # H: four positions, left justified
    0          
794 0         0 $atom_line .= sprintf("%-4s", $atom->id);
795             } elsif (length($atom_id) == 4) {
796 0 0       0 if ($atom_id =~ /^(H\d\d)(\d)$/) { # turn H123 into 3H12
797 0         0 $atom_line .= $2.$1;
798             } else { # no more guesses, no more alternatives
799 0         0 $atom_line .= $atom_id;
800             }
801             } else { # if we get here and it is not correct let me know
802 0         0 $atom_line .= sprintf(" %-3s", $atom->id);
803             }
804             }
805             }
806             # we don't do alternate location at this moment
807 171         134 $atom_line .= " "; # 17
808 171         126 $atom_line .= sprintf("%3s",$resname); # 18-20
809 171         120 $atom_line .= " ".$chain_id; # 21, 22
810 171         146 $atom_line .= sprintf("%4d", $resnum); # 23-26
811 171 50       198 $atom_line .= $atom->icode ? $atom->icode : " "; # 27
812 171         110 $atom_line .= " "; # 28-30
813 171         217 $atom_line .= sprintf("%8.3f", $atom->x); # 31-38
814 171         238 $atom_line .= sprintf("%8.3f", $atom->y); # 39-46
815 171         256 $atom_line .= sprintf("%8.3f", $atom->z); # 47-54
816 171         243 $atom_line .= sprintf("%6.2f", $atom->occupancy); # 55-60
817 171         244 $atom_line .= sprintf("%6.2f", $atom->tempfactor); # 61-66
818 171         150 $atom_line .= " "; # 67-72
819 171 50       214 $atom_line .= $atom->segID ? # segID 73-76
820             sprintf("%-4s", $atom->segID) :
821             " ";
822 171 50       197 $atom_line .= $atom->element ?
823             sprintf("%2s", $atom->element) :
824             " ";
825 171 50       232 $atom_line .= $atom->charge ?
826             sprintf("%2s", $atom->charge) :
827             " ";
828              
829 171         252 $self->_print($atom_line,"\n");
830             }
831             }
832             # write out TER record
833 4 100       17 if ( $resname ne "HOH" ) {
834 3         4 my $ter_line = "TER ";
835 3         7 $ter_line .= sprintf("%5d", $atom_serial + 1);
836 3         4 $ter_line .= " ";
837 3         4 $ter_line .= sprintf("%3s ", $resname);
838 3         4 $ter_line .= $chain_id;
839 3         4 $ter_line .= sprintf("%4d", $resnum);
840 3 50       5 $ter_line .= $atom_icode ? $atom_icode : " "; # 27
841 3         9 $ter_line .= " " x (80 - length $ter_line); # extend to 80 chars
842 3         7 $self->_print($ter_line,"\n");
843             }
844             }
845 1 50       9 if ($struc->get_models > 1) { # we need ENDMDL
846 0         0 my $endmdl_line = "ENDMDL" . " " x 74;
847 0         0 $self->_print($endmdl_line, "\n");
848             }
849             } # for my $model
850              
851             # CONECT
852 1         7 my @sources = $struc->get_all_conect_source;
853 1         2 my ($conect_line,@conect, @bond, @hydbond, @saltbridge, $to, $type);
854 1         3 for my $source (@sources) {
855             # get all conect's
856 8         15 my @conect = $struc->conect($source);
857             # classify
858 8         9 for my $con (@conect) {
859 12         20 ($to, $type) = split /_/, $con;
860 12 50       18 if($type eq "bond") {
    0          
    0          
861 12         15 push @bond, $to;
862             } elsif($type eq "hydrogenbonded") {
863 0         0 push @hydbond, $to;
864             } elsif($type eq "saltbridged") {
865 0         0 push @saltbridge, $to;
866             } else {
867 0         0 $self->throw("type $type is unknown for conect");
868             }
869             }
870             # and write out CONECT lines as long as there is something
871             # in one of the arrays
872 8   66     17 while ( @bond || @hydbond || @saltbridge) {
      66        
873 8         7 my ($b, $hb, $sb);
874 8         12 $conect_line = "CONECT". sprintf("%5d", $source);
875 8         10 for my $k (0..3) {
876 32         25 $b = shift @bond;
877 32 100       39 $conect_line .= $b ? sprintf("%5d", $b) : " ";
878             }
879 8         9 for my $k (4..5) {
880 16         11 $hb = shift @hydbond;
881 16 50       26 $conect_line .= $hb ? sprintf("%5d", $hb) : " ";
882             }
883 8         5 $sb = shift @saltbridge;
884 8 50       10 $conect_line .= $sb ? sprintf("%5d", $sb) : " ";
885 8         10 for my $k (7..8) {
886 16         9 $hb = shift @hydbond;
887 16 50       19 $conect_line .= $hb ? sprintf("%5d", $hb) : " ";
888             }
889 8         8 $sb = shift @saltbridge;
890 8 50       7 $conect_line .= $sb ? sprintf("%5d", $sb) : " ";
891              
892 8         10 $conect_line .= " " x (80 - length($conect_line) );
893 8         16 $self->_print($conect_line, "\n");
894             }
895             }
896              
897             # MASTER line contains checksums, we should calculate them of course :)
898 1         4 my $master_line = "MASTER " . $struc->master;
899 1         4 $master_line .= " " x (80 - length($master_line) );
900 1         3 $self->_print($master_line, "\n");
901              
902 1         1 my $end_line = "END" . " " x 77;
903 1         3 $self->_print($end_line,"\n");
904              
905             }
906              
907             =head2 _filehandle
908              
909             Title : _filehandle
910             Usage : $obj->_filehandle($newval)
911             Function:
912             Example :
913             Returns : value of _filehandle
914             Args : newvalue (optional)
915              
916             =cut
917              
918             sub _filehandle{
919 0     0   0 my ($obj,$value) = @_;
920 0 0       0 if( defined $value) {
921 0         0 $obj->{'_filehandle'} = $value;
922             }
923 0         0 return $obj->{'_filehandle'};
924              
925             }
926              
927             =head2 _noatom
928              
929             Title : _noatom
930             Usage : $obj->_noatom($newval)
931             Function:
932             Example :
933             Returns : value of _noatom
934             Args : newvalue (optional)
935              
936              
937             =cut
938              
939             sub _noatom{
940 2     2   2 my ($obj,$value) = @_;
941 2 50       5 if( defined $value) {
942 0         0 $obj->{'_noatom'} = $value;
943             }
944 2         4 return $obj->{'_noatom'};
945              
946             }
947              
948             =head2 _noheader
949              
950             Title : _noheader
951             Usage : $obj->_noheader($newval)
952             Function:
953             Example :
954             Returns : value of _noheader
955             Args : newvalue (optional)
956              
957              
958             =cut
959              
960             sub _noheader{
961 2     2   2 my ($obj,$value) = @_;
962 2 50       4 if( defined $value) {
963 0         0 $obj->{'_noheader'} = $value;
964             }
965 2         4 return $obj->{'_noheader'};
966              
967             }
968              
969             =head2 _read_PDB_singlecontline
970              
971             Title : _read_PDB_singlecontline
972             Usage : $obj->_read_PDB_singlecontline($record, $fromto, $buffer))
973             Function: read single continued record from PDB
974             Returns : concatenated record entry (between $fromto columns)
975             Args : record, colunm delimiters, buffer
976              
977             =cut
978              
979             sub _read_PDB_singlecontline {
980 9     9   11 my ($self, $record, $fromto, $buffer) = @_;
981 9         9 my $concat_line;
982              
983 9         55 my ($begin, $end) = (split (/-/, $fromto));
984 9         9 my $unpack_string = "x8 a2 ";
985 9 50       19 if($begin == 12) { # one additional space
986 0         0 $unpack_string .= "x1 a59";
987             } else {
988 9         8 $unpack_string .= "a60";
989             }
990 9         8 $_ = $$buffer;
991 9   66     18 while (defined( $_ ||= $self->_readline ) ) {
992 25 100       117 if ( /^$record/ ) {
993 16         32 my($cont, $rol) = unpack $unpack_string, $_;
994 16 100 66     47 if($cont =~ /\d$/ && $begin == 11) { # continuation line
995             # and text normally at pos 11
996 7         10 $rol =~ s/^\s//; # strip leading space
997             }
998             ## no space (store litteraly) $concat_line .= $rol . " ";
999 16         22 $concat_line .= $rol;
1000             } else {
1001 9         9 last;
1002             }
1003              
1004 16         46 $_ = undef;
1005             }
1006 9         18 $concat_line =~ s/\s$//; # remove trailing space
1007 9         9 $$buffer = $_;
1008              
1009 9         15 return $concat_line;
1010             }
1011              
1012              
1013             =head2 _read_PDB_jrnl
1014              
1015             Title : _read_PDB_jrnl
1016             Usage : $obj->_read_PDB_jrnl($\buffer))
1017             Function: read jrnl record from PDB
1018             Returns : Bio::Annotation::Reference object
1019             Args :
1020              
1021             =cut
1022              
1023             sub _read_PDB_jrnl {
1024 2     2   3 my ($self, $buffer) = @_;
1025              
1026 2         2 $_ = $$buffer;
1027 2         3 my ($auth, $titl,$edit,$ref,$publ,$refn, $pmid, $doi);
1028 2   66     6 while (defined( $_ ||= $self->_readline )) {
1029 15 100       30 if (/^JRNL /) {
1030             # this code belgons in a seperate method (shared with
1031             # remark 1 parsing)
1032 13         33 my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
1033 13 100       30 $auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
1034 13 100       20 $titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
1035 13 50       15 $edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
1036 13 100       18 $ref = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
1037 13 50       15 $publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
1038 13 100       16 $refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
1039 13 50       15 $pmid = $self->_concatenate_lines($pmid,$rol) if ($subr eq "PMID");
1040 13 50       14 $doi = $self->_concatenate_lines($doi,$rol) if ($subr eq "DOI");
1041             } else {
1042 2         3 last;
1043             }
1044              
1045 13         29 $_ = undef; # trigger reading of next line
1046             } # while
1047              
1048 2         2 $$buffer = $_;
1049 2         12 my $jrnl_ref = Bio::Annotation::Reference->new;
1050              
1051 2         5 $jrnl_ref->authors($auth);
1052 2         4 $jrnl_ref->title($titl);
1053 2         4 $jrnl_ref->location($ref);
1054 2         5 $jrnl_ref->publisher($publ);
1055 2         3 $jrnl_ref->editors($edit);
1056 2         4 $jrnl_ref->encoded_ref($refn);
1057 2         4 $jrnl_ref->pubmed($pmid);
1058 2         4 $jrnl_ref->doi($doi);
1059              
1060 2         5 return $jrnl_ref;
1061             } # sub _read_PDB_jrnl
1062              
1063              
1064             =head2 _read_PDB_remark_1
1065              
1066             Title : _read_PDB_remark_1
1067             Usage : $obj->_read_PDB_remark_1($\buffer))
1068             Function: read "remark 1" record from PDB
1069             Returns : array of Bio::Annotation::Reference objects
1070             Args :
1071              
1072             =cut
1073              
1074             sub _read_PDB_remark_1 {
1075 2     2   2 my ($self, $buffer) = @_;
1076              
1077 2         3 $_ = $$buffer;
1078 2         2 my ($auth, $titl,$edit,$ref,$publ,$refn,$refnum,$pmid, $doi);
1079 0         0 my @refs;
1080              
1081 2   66     9 while (defined( $_ ||= $self->_readline )) {
1082 59 100       111 if (/^REMARK 1 /) {
1083 57 100       88 if (/^REMARK 1\s+REFERENCE\s+(\d+)\s*/) {
1084 8         11 $refnum = $1;
1085 8 100       14 if ($refnum != 1) { # this is first line of a reference
1086 7         10 my $rref = Bio::Annotation::Reference->new;
1087 7         13 $rref->authors($auth);
1088 7         7 $rref->title($titl);
1089 7         8 $rref->location($ref);
1090 7         8 $rref->publisher($publ);
1091 7         9 $rref->editors($edit);
1092 7         12 $rref->encoded_ref($refn);
1093 7         9 $rref->pubmed($pmid);
1094 7         7 $rref->doi($doi);
1095 7         7 $auth = $titl = $edit = $ref = $publ = $refn = undef;
1096 7         7 push @refs, $rref;
1097             }
1098             } else {
1099             # this code belgons in a seperate method (shared with
1100             # remark 1 parsing)
1101 49         107 my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
1102 49 100       70 $auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
1103 49 100       73 $titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
1104 49 50       55 $edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
1105 49 100       61 $ref = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
1106 49 50       51 $publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
1107 49 100       56 $refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
1108 49 50       52 $pmid = $self->_concatenate_lines($pmid,$rol) if ($subr eq "PMID");
1109 49 50       62 $doi = $self->_concatenate_lines($doi,$rol) if ($subr eq "DOI");
1110             }
1111             } else {
1112             # have we seen any reference at all (could be single REMARK 1 line
1113 2 100       5 if ( ! defined ($refnum) ) {
1114 1         2 last; # get out of while()
1115             }
1116              
1117             # create last reference
1118 1         2 my $rref = Bio::Annotation::Reference->new;
1119 1         2 $rref->authors($auth);
1120 1         2 $rref->title($titl);
1121 1         2 $rref->location($ref);
1122 1         2 $rref->publisher($publ);
1123 1         1 $rref->editors($edit);
1124 1         2 $rref->encoded_ref($refn);
1125 1         2 $rref->pubmed($pmid);
1126 1         2 $rref->doi($doi);
1127 1         1 push @refs, $rref;
1128 1         2 last;
1129             }
1130              
1131 57         111 $_ = undef; # trigger reading of next line
1132             } # while
1133              
1134 2         3 $$buffer = $_;
1135              
1136 2         5 return @refs;
1137             } # sub _read_PDB_jrnl
1138              
1139              
1140             =head2 _read_PDB_coordinate_section
1141              
1142             Title : _read_PDB_coordinate_section
1143             Usage : $obj->_read_PDB_coordinate_section($\buffer))
1144             Function: read one model from a PDB
1145             Returns : Bio::Structure::Model object
1146             Args :
1147              
1148             =cut
1149              
1150             sub _read_PDB_coordinate_section {
1151 2     2   4 my ($self, $buffer, $struc) = @_;
1152 2         2 my ($model_num, $chain_name, $residue_name, $atom_name); # to keep track of state
1153 2         2 $model_num = "";
1154 2         2 $chain_name = "";
1155 2         2 $residue_name = "";
1156 2         1 $atom_name = "";
1157              
1158 2         2 my $atom_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x3 a8 a8 a8 a6 a6 x6 a4 a2 a2";
1159 2         1 my $anisou_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x1 a7 a7 a7 a7 a7 a7 a4 a2 a2";
1160              
1161 2         13 my $model = Bio::Structure::Model->new;
1162 2         5 $model->id('default');
1163 2         3 my $noatom = $self->_noatom;
1164 2         2 my ($chain, $residue, $atom, $old);
1165 0         0 my (%_ch_in_model); # which chains are already in this model
1166              
1167 2         3 $_ = $$buffer;
1168 2   66     5 while (defined( $_ ||= $self->_readline )) {
1169             # start of a new model
1170 659 50       837 if (/^MODEL\s+(\d+)/) {
1171 0         0 $model_num = $1;
1172 0         0 $self->debug("_read_PDB_coor: parsing model $model_num\n");
1173 0         0 $model->id($model_num);
1174 0 0       0 if (/^MODEL\s+\d+\s+\S+/) { # old format (pre 2.1)
1175 0         0 $old = 1;
1176             }
1177             }
1178             # old hier ook setten XXX
1179             # ATOM lines, if first set chain
1180 659 100       1449 if (/^(ATOM |HETATM|SIGATM)/) {
1181 653         3466 my @line_elements = unpack $atom_unpack, $_;
1182 653         685 my $pdb_atomname = $line_elements[1]; # need to get this before removing spaces
1183 653         1058 for my $k (0 .. $#line_elements) {
1184 9795         10642 $line_elements[$k] =~ s/^\s+//; # remove leading space
1185 9795         7472 $line_elements[$k] =~ s/\s+$//; # remove trailing space
1186 9795 100       13758 $line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
1187             }
1188 653         1036 my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode, $x, $y, $z,
1189             $occupancy, $tempfactor, $segID, $element, $charge) = @line_elements;
1190 653 100       851 $chainID = 'default' if ( !defined $chainID );
1191 653 100       780 if ($chainID ne $chain_name) { # possibly a new chain
1192             # fix for bug #1187
1193             # we can have ATOM/HETATM of an already defined chain (A B A B)
1194             # e.g. 1abm
1195              
1196 5 50       6 if (exists $_ch_in_model{$chainID} ) { # we have already seen this chain in this model
1197 0         0 $chain = $_ch_in_model{$chainID};
1198             } else { # we create a new chain
1199 5         19 $chain = Bio::Structure::Chain->new;
1200 5         10 $struc->add_chain($model,$chain);
1201 5         8 $chain->id($chainID);
1202 5         6 $_ch_in_model{$chainID} = $chain;
1203             }
1204 5         7 $chain_name = $chain->id;
1205             }
1206             #my $res_name_num = $resname."-".$resseq;
1207 653         718 my $res_name_num = $resname."-".$resseq;
1208 653 50       697 $res_name_num .= '.'.$icode if $icode;
1209 653 100       730 if ($res_name_num ne $residue_name) { # new residue
1210 157         286 $residue = Bio::Structure::Residue->new;
1211 157         296 $struc->add_residue($chain,$residue);
1212 157         213 $residue->id($res_name_num);
1213 157         112 $residue_name = $res_name_num;
1214 157         119 $atom_name = ""; # only needed inside a residue
1215             }
1216             # get out of here if we don't want the atom objects
1217 653 50       803 if ($noatom) {
1218 0         0 $_ = undef;
1219 0         0 next;
1220             }
1221             # alternative location: only take first one
1222 653 0 33     931 if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
      33        
1223 0         0 $_ = undef; # trigger reading next line
1224 0         0 next;
1225             }
1226 653 50       1193 if (/^(ATOM |HETATM)/) { # ATOM / HETATM
1227 653         490 $atom_name = $atomname;
1228 653         1143 $atom = Bio::Structure::Atom->new;
1229 653         1103 $struc->add_atom($residue,$atom);
1230 653         1049 $atom->id($atomname);
1231 653         896 $atom->pdb_atomname($pdb_atomname); # store away PDB atomname for writing out
1232 653         806 $atom->serial($serial);
1233 653         726 $atom->icode($icode);
1234 653         787 $atom->x($x);
1235 653         756 $atom->y($y);
1236 653         669 $atom->z($z);
1237 653         785 $atom->occupancy($occupancy);
1238 653         671 $atom->tempfactor($tempfactor);
1239 653         816 $atom->segID($segID); # deprecated but used by people
1240 653 50       753 if (! $old ) {
1241 653         690 $atom->element($element);
1242 653         738 $atom->charge($charge);
1243             }
1244             }
1245             else { # SIGATM
1246 0         0 my $sigx = $x;
1247 0         0 my $sigy = $y;
1248 0         0 my $sigz = $z;
1249 0         0 my $sigocc = $occupancy;
1250 0         0 my $sigtemp = $tempfactor;
1251 0 0       0 if ($atom_name ne $atomname) { # something wrong with PDB file
1252 0         0 $self->throw("A SIGATM record should have the same $atomname as the previous record $atom_name\n");
1253             }
1254 0         0 $atom->sigx($sigx);
1255 0         0 $atom->sigy($sigy);
1256 0         0 $atom->sigz($sigz);
1257 0         0 $atom->sigocc($sigocc);
1258 0         0 $atom->sigtemp($sigtemp);
1259              
1260             }
1261             } # ATOM|HETARM|SIGATM
1262              
1263             # ANISOU | SIGUIJ lines
1264 659 50       1279 if (/^(ANISOU|SIGUIJ)/) {
1265 0 0       0 if ($noatom) {
1266 0         0 $_ = undef;
1267 0         0 next;
1268             }
1269 0         0 my @line_elements = unpack $anisou_unpack, $_;
1270 0         0 for my $k (0 .. $#line_elements) {
1271 0         0 $line_elements[$k] =~ s/^\s+//; # remove leading space
1272 0         0 $line_elements[$k] =~ s/\s+$//; # remove trailing space
1273 0 0       0 $line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
1274             }
1275 0         0 my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode,
1276             $u11,$u22, $u33, $u12, $u13, $u23, $segID, $element, $charge) = @line_elements;
1277 0         0 $self->debug("read_PDB_coor: parsing ANISOU record: $serial $atomname\n");
1278 0 0 0     0 if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
      0        
1279 0         0 $_ = undef;
1280 0         0 next;
1281             }
1282 0 0       0 if (/^ANISOU/) {
1283 0 0       0 if ($atom_name ne $atomname) { # something wrong with PDB file
1284 0         0 $self->throw("A ANISOU record should have the same $atomname as the previous record $atom_name\n");
1285             }
1286 0         0 $atom->aniso("u11",$u11);
1287 0         0 $atom->aniso("u22",$u22);
1288 0         0 $atom->aniso("u33",$u33);
1289 0         0 $atom->aniso("u12",$u12);
1290 0         0 $atom->aniso("u13",$u13);
1291 0         0 $atom->aniso("u23",$u23);
1292             }
1293             else { # SIGUIJ
1294 0 0       0 if ($atom_name ne $atomname) { # something wrong with PDB file
1295 0         0 $self->throw("A SIGUIJ record should have the same $atomname as the previous record $atom_name\n");
1296             }
1297             # could use different variable names, but hey ...
1298 0         0 $atom->aniso("sigu11",$u11);
1299 0         0 $atom->aniso("sigu22",$u22);
1300 0         0 $atom->aniso("sigu33",$u33);
1301 0         0 $atom->aniso("sigu12",$u12);
1302 0         0 $atom->aniso("sigu13",$u13);
1303 0         0 $atom->aniso("sigu23",$u23);
1304             }
1305             } # ANISOU | SIGUIJ
1306              
1307 659 100       842 if (/^TER /) {
1308 4         6 $_ = undef;
1309 4         13 next;
1310             }
1311              
1312 655 50       724 if (/^ENDMDL/) {
1313 0         0 $_ = $self->_readline;
1314 0         0 last;
1315             }
1316              
1317 655 100       903 if (/^(CONECT|MASTER)/) { # get out of here
1318             # current line is OK
1319 2         4 last;
1320             }
1321 653         1707 $_ = undef;
1322              
1323             } # while
1324              
1325 2         4 $$buffer = $_;
1326              
1327 2         9 return $model;
1328             } # _read_PDB_coordinate_section
1329              
1330              
1331             sub _write_PDB_simple_record {
1332 38     38   65 my ($self, @args) = @_;
1333 38         91 my ($name, $cont , $annotation, $rol, $string) =
1334             $self->_rearrange([qw(
1335             NAME
1336             CONT
1337             ANNOTATION
1338             ROL
1339             STRING
1340             )],
1341             @args);
1342 38 50 66     99 if (defined $string && defined $annotation) {
1343 0         0 $self->throw("you can only supply one of -annoation or -string");
1344             }
1345 38         25 my ($output_string, $ann_string, $t_string);
1346 38         109 my ($rol_begin, $rol_end) = $rol =~ /^(\d+)-(\d+)$/;
1347 38         59 my $rol_length = $rol_end - $rol_begin +1;
1348 38 100       52 if ($string) {
1349 4 100       6 if (length $string > $rol_length) {
1350             # we might need to split $string in multiple lines
1351 2         4 while (length $string > $rol_length) {
1352             # other option might be to go for a bunch of substr's
1353 3         26 my @c = split//,$string;
1354 3         3 my $t = $rol_length; # index into @c
1355 3         8 while ($c[$t] ne " ") { # find first space, going backwards
1356 21         40 $self->debug("c[t]: $c[$t] $t\n");
1357 21         18 $t--;
1358 21 50       36 if ($t == 0) { $self->throw("Found no space for $string\n"); }
  0         0  
1359             }
1360 3         8 $self->debug("t: $t rol_length: $rol_length\n");
1361 3         5 $ann_string .= substr($string, 0, $t);
1362 3         5 $self->debug("ann_string: $ann_string\n");
1363 3         4 $ann_string .= " " x ($rol_length - $t );
1364 3         46 $string = substr($string, $t+1);
1365 3         8 $string =~ s/^\s+//;
1366 3         9 $self->debug("ann_string: $ann_string~~\nstring: $string~~\n");
1367             }
1368 2         3 $ann_string .= $string;
1369             } else {
1370 2         3 $ann_string = $string;
1371             }
1372             } else {
1373 34         51 $ann_string = $annotation->as_text;
1374 34         85 $ann_string =~ s/^Value: //;
1375             }
1376             # ann_string contains the thing to write out, writing out happens below
1377 38         33 my $ann_length = length $ann_string;
1378              
1379 38         97 $self->debug("ann_string: $ann_string\n");
1380 38 100       49 if ($cont) {
1381 10         27 my ($c_begin, $c_end) = $cont =~ /^(\d+)-(\d+)$/;
1382 10 100       17 if ( $ann_length > $rol_length ) { # we need to continuation lines
1383 6         4 my $first_line = 1;
1384 6         5 my $cont_number = 2;
1385 6         6 my $out_line;
1386 6         2 my $num_pos = $rol_length;
1387 6         5 my $i = 0;
1388 6         11 while( $i < $ann_length ) {
1389 15         17 $t_string = substr($ann_string, $i, $num_pos);
1390 15         29 $self->debug("t_string: $t_string~~$i $num_pos\n");
1391 15 100       18 if ($first_line) {
1392 6         14 $out_line = $name . " " x ($rol_begin - $c_begin) . $t_string;
1393 6         10 $out_line .= " " x (80 - length($out_line) ) . "\n";
1394 6         4 $first_line = 0;
1395 6         5 $output_string = $out_line;
1396 6         5 $i += $num_pos; # first do counter
1397 6 100       11 if ($rol_begin - $c_end == 1) { # next line one character less
1398 4         7 $num_pos--;
1399             }
1400             } else {
1401 9         16 $out_line = $name . sprintf("%2d",$cont_number);
1402             # a space after continuation number
1403 9 100       11 if ($rol_begin - $c_end == 1) { # one space after cont number
1404 6         4 $out_line .= " ";
1405 6         4 $out_line .= $t_string;
1406             } else {
1407 3         6 $out_line .= " " x ($rol_begin - $c_end - 1) . $t_string;
1408             }
1409 9         12 $out_line .= " " x (80 -length($out_line) ) . "\n";
1410 9         7 $cont_number++;
1411 9         9 $output_string .= $out_line;
1412 9         14 $i += $num_pos;
1413             }
1414             }
1415             } else { # no continuation
1416 4         4 my $spaces = $rol_begin - $c_begin; # number of spaces need to insert
1417 4         7 $output_string = $name . " " x $spaces . $ann_string;
1418 4         6 $output_string .= " " x (80 - length($output_string) );
1419             }
1420             } else { # no contintuation lines
1421 28 50       40 if ($ann_length < $rol_length) {
1422 0         0 $output_string = $name . $ann_string;
1423 0         0 $output_string .= " " x (80 - length($output_string) );
1424             } else {
1425 28         39 for (my $i = 0; $i < $ann_length; $i += $rol_length) {
1426 249         131 my $out_line;
1427 249         171 $t_string = substr($ann_string, $i, $rol_length);
1428 249         198 $out_line = $name . $t_string;
1429 249         185 $out_line .= " " x (80 -length($out_line) ) . "\n";
1430 249         335 $output_string .= $out_line;
1431             }
1432             }
1433             }
1434 38         90 $output_string =~ s/\n$//; # remove trailing newline
1435 38         93 $self->_print("$output_string\n");
1436              
1437             }
1438              
1439             sub _write_PDB_remark_record {
1440 13     13   12 my ($self, $struc, $remark_num) = @_;
1441 13         24 my ($ann) = $struc->annotation->get_Annotations("remark_$remark_num");
1442 13         26 my $name = sprintf("REMARK %3d ",$remark_num);
1443 13         28 $self->_write_PDB_simple_record(-name => $name, -annotation => $ann, -rol => "12-70");
1444             }
1445              
1446             1;