File Coverage

Bio/Structure/IO/pdb.pm
Criterion Covered Total %
statement 595 751 79.2
branch 231 398 58.0
condition 79 165 47.8
subroutine 17 18 94.4
pod 2 2 100.0
total 924 1334 69.2


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Structure::IO::pdb
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Kris Boulez
7             #
8             # Copyright 2001, 2002 Kris Boulez
9             #
10             # Framework is a copy of Bio::SeqIO::embl.pm
11             #
12             # You may distribute this module under the same terms as perl itself
13              
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::Structure::IO::pdb - PDB input/output stream
19              
20             =head1 SYNOPSIS
21              
22             It is probably best not to use this object directly, but
23             rather go through the Bio::Structure::IO handler system. Go:
24              
25             $stream = Bio::Structure::IO->new(-file => $filename,
26             -format => 'PDB');
27              
28             while (my $structure = $stream->next_structure) {
29             # do something with $structure
30             }
31              
32             =head1 DESCRIPTION
33              
34             This object can transform Bio::Structure objects to and from PDB flat
35             file databases. The working is similar to that of the Bio::SeqIO handlers.
36              
37             =head1 FEEDBACK
38              
39             =head2 Mailing Lists
40              
41             User feedback is an integral part of the evolution of this and other
42             Bioperl modules. Send your comments and suggestions preferably to one
43             of the Bioperl mailing lists. Your participation is much appreciated.
44              
45             bioperl-l@bioperl.org - General discussion
46             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47              
48             =head2 Support
49              
50             Please direct usage questions or support issues to the mailing list:
51              
52             I
53              
54             rather than to the module maintainer directly. Many experienced and
55             reponsive experts will be able look at the problem and quickly
56             address it. Please include a thorough description of the problem
57             with code and data examples if at all possible.
58              
59             =head2 Reporting Bugs
60              
61             Report bugs to the Bioperl bug tracking system to help us keep track
62             the bugs and their resolution. Bug reports can be submitted via the
63             web:
64              
65             https://github.com/bioperl/bioperl-live/issues
66              
67             =head1 AUTHOR - Kris Boulez
68              
69             Email kris.boulez@algonomics.com
70              
71             =head1 APPENDIX
72              
73             The rest of the documentation details each of the object methods.
74             Internal methods are usually preceded with a _
75              
76             =cut
77              
78              
79             # Let the code begin...
80              
81              
82             package Bio::Structure::IO::pdb;
83 1     1   5 use strict;
  1         2  
  1         24  
84 1     1   266 use Bio::Structure::Entry;
  1         1  
  1         25  
85             #use Bio::Structure::Model;
86             #use Bio::Structure::Chain;
87             #use Bio::Structure::Residue;
88 1     1   254 use Bio::Structure::Atom;
  1         2  
  1         21  
89 1     1   306 use Bio::SeqFeature::Generic;
  1         2  
  1         25  
90 1     1   268 use Bio::Annotation::Reference;
  1         2  
  1         26  
91              
92 1     1   5 use base qw(Bio::Structure::IO);
  1         2  
  1         6210  
93              
94             sub _initialize {
95 3     3   8 my($self,@args) = @_;
96              
97 3         11 $self->SUPER::_initialize(@args);
98              
99 3         10 my ($noheader, $noatom) =
100             $self->_rearrange([qw(
101             NOHEADER
102             NOATOM
103             )],
104             @args);
105 3 50       7 $noheader && $self->_noheader($noheader);
106 3 50       13 $noatom && $self->_noatom($noatom);
107             }
108              
109              
110             =head2 next_structure;
111              
112             Title : next_structure
113             Usage : $struc = $stream->next_structure()
114             Function: returns the next structure in the stream
115             Returns : Bio::Structure object
116             Args :
117              
118             =cut
119              
120             sub next_structure {
121 2     2 1 7 my ($self,@args) = @_;
122 2         2 my ($line);
123 2         4 my ($obslte, $title, $caveat, $compnd, $source, $keywds,
124             $expdta, $author, %revdat, $revdat, $sprsde, $jrnl, %remark, $dbref,
125             $turn, $ssbond, $link, $hydbnd, $sltbrg, $cispep,
126             $site, $cryst1, $tvect,);
127 2         12 my $struc = Bio::Structure::Entry->new(-id => 'created from pdb.pm');
128 2         6 my $all_headers = ( !$self->_noheader ); # we'll parse all headers and store as annotation
129 2         2 my %header; # stores all header RECORDs an is stored as annotations when ATOM is reached
130              
131              
132 2         9 $line = $self->_readline; # This needs to be before the first eof() test
133              
134 2 50       4 if( !defined $line ) {
135 0         0 return; # no throws - end of file
136             }
137              
138 2 50       8 if( $line =~ /^\s+$/ ) {
139 0         0 while( defined ($line = $self->_readline) ) {
140 0 0       0 $line =~/\S/ && last;
141             }
142             }
143 2 50       6 if( !defined $line ) {
144 0         0 return; # end of file
145             }
146 2 50       6 $line =~ /^HEADER\s+\S+/ || $self->throw("PDB stream with no HEADER. Not pdb in my book");
147 2         9 my($header_line) = unpack "x10 a56", $line;
148 2         4 $header{'header'} = $header_line;
149 2         6 my($class, $depdate, $idcode) = unpack "x10 a40 a9 x3 a4", $line;
150 2         8 $idcode =~ s/^\s*(\S+)\s*$/$1/;
151 2         6 $struc->id($idcode);
152 2         11 $self->debug("PBD c $class d $depdate id $idcode\n"); # XXX KB
153              
154 2         2 my $buffer = $line;
155              
156             BEFORE_COORDINATES :
157 2         9 until( !defined $buffer ) {
158 292         305 $_ = $buffer;
159              
160             # Exit at start of coordinate section
161 292 100       540 last if /^(MODEL|ATOM|HETATM)/;
162              
163             # OBSLTE line(s)
164 290 50 33     402 if (/^OBSLTE / && $all_headers) {
165 0         0 $obslte = $self->_read_PDB_singlecontline("OBSLTE","12-70",\$buffer);
166 0         0 $header{'obslte'} = $obslte;
167             }
168              
169             # TITLE line(s)
170 290 100 66     375 if (/^TITLE / && $all_headers) {
171 1         4 $title = $self->_read_PDB_singlecontline("TITLE","11-70",\$buffer);
172 1         12 $header{'title'} = $title;
173             }
174              
175             # CAVEAT line(s)
176 290 50 33     404 if (/^CAVEAT / && $all_headers) {
177 0         0 $caveat = $self->_read_PDB_singlecontline("CAVEAT","12-70",\$buffer);
178 0         0 $header{'caveat'} = $caveat;
179             }
180              
181             # COMPND line(s)
182 290 100 66     392 if (/^COMPND / && $all_headers) {
183 2         6 $compnd = $self->_read_PDB_singlecontline("COMPND","11-70",\$buffer);
184 2         5 $header{'compnd'} = $compnd;
185 2         18 $self->debug("get COMPND $compnd\n");
186             }
187              
188             # SOURCE line(s)
189 290 100 66     359 if (/^SOURCE / && $all_headers) {
190 2         7 $source = $self->_read_PDB_singlecontline("SOURCE","11-70",\$buffer);
191 2         5 $header{'source'} = $source;
192             }
193              
194             # KEYWDS line(s)
195 290 100 66     400 if (/^KEYWDS / && $all_headers) {
196 1         5 $keywds = $self->_read_PDB_singlecontline("KEYWDS","11-70",\$buffer);
197 1         2 $header{'keywds'} = $keywds;
198             }
199              
200             # EXPDTA line(s)
201 290 100 66     370 if (/^EXPDTA / && $all_headers) {
202 1         3 $expdta = $self->_read_PDB_singlecontline("EXPDTA","11-70",\$buffer);
203 1         2 $header{'expdta'} = $expdta;
204             }
205              
206             # AUTHOR line(s)
207 290 100 66     410 if (/^AUTHOR / && $all_headers) {
208 2         4 $author = $self->_read_PDB_singlecontline("AUTHOR","11-70",\$buffer);
209 2         4 $header{'author'} = $author;
210             }
211              
212             # REVDAT line(s)
213             # a bit more elaborate as we also store the modification number
214 290 100 66     385 if (/^REVDAT / && $all_headers) {
215             ##my($modnum,$rol) = unpack "x7 A3 x3 A53", $_;
216             ##$modnum =~ s/\s+//; # remove spaces
217             ##$revdat{$modnum} .= $rol;
218 3         7 my ($rol) = unpack "x7 a59", $_;
219 3         7 $revdat .= $rol;
220 3         3 $header{'revdat'} = $revdat;
221             }
222              
223             # SPRSDE line(s)
224 290 50 33     394 if (/^SPRSDE / && $all_headers) {
225 0         0 $sprsde = $self->_read_PDB_singlecontline("SPRSDE","12-70",\$buffer);
226 0         0 $header{'sprsde'} = $sprsde;
227             }
228              
229             # jRNL line(s)
230 290 100 66     398 if (/^JRNL / && $all_headers) {
231 2         6 $jrnl = $self->_read_PDB_jrnl(\$buffer);
232 2         5 $struc->annotation->add_Annotation('reference',$jrnl);
233 2         4 $header{'jrnl'} = 1; # when writing out, we need a way to check there was a JRNL record (not mandatory)
234             }
235              
236             # REMARK line(s)
237             # we only parse the "REMARK 1" lines (additional references)
238             # thre rest is stored in %remark (indexed on remarkNum) (pack does space-padding)
239 290 100 66     869 if (/^REMARK\s+(\d+)\s*/ && $all_headers) {
240 240         360 my $remark_num = $1;
241 240 100       317 if ($remark_num == 1) {
242 2         5 my @refs = $self->_read_PDB_remark_1(\$buffer);
243             # How can we find the primary reference when writing (JRNL record) XXX KB
244 2         3 foreach my $ref (@refs) {
245 8         13 $struc->annotation->add_Annotation('reference', $ref);
246             }
247             # $_ still holds the REMARK_1 line, $buffer now contains the first non
248             # REMARK_1 line. We need to parse it in this pass (so no else block)
249 2         3 $_ = $buffer;
250             }
251             # for the moment I don't see a better solution (other then using goto)
252 240 50       474 if (/^REMARK\s+(\d+)\s*/) {
253 240         265 my $r_num = $1;
254 240 50       320 if ($r_num != 1) { # other remarks, we store literlly at the moment
255 240         450 my ($rol) = unpack "x11 a59", $_;
256 240         503 $remark{$r_num} .= $rol;
257             }
258             }
259             } # REMARK
260              
261             # DBREF line(s)
262             # references to sequences in other databases
263             # we store as 'dblink' annotations and whole line as simple annotation (round-trip)
264 290 100 66     441 if (/^DBREF / && $all_headers) {
265 3         8 my ($rol) = unpack "x7 a61", $_;
266 3         7 $dbref .= $rol;
267 3         4 $header{'dbref'} = $dbref;
268 3         7 my ($db, $acc) = unpack "x26 a6 x1 a8", $_;
269 3         10 $db =~ s/\s*$//;
270 3         10 $acc =~ s/\s*$//;
271 3         10 my $link = Bio::Annotation::DBLink->new;
272 3         8 $link->database($db);
273 3         6 $link->primary_id($acc);
274 3         7 $struc->annotation->add_Annotation('dblink', $link);
275             } # DBREF
276              
277             # SEQADV line(s)
278 290 50 33     424 if (/^SEQADV / && $all_headers) {
279 0         0 my ($rol) = unpack "x7 a63", $_;
280 0         0 $header{'seqadv'} .= $rol;
281             } # SEQADV
282              
283             # SEQRES line(s)
284             # this is (I think) the sequence of macromolecule that was analysed
285             # this will be returned when doing $struc->seq
286 290 100 66     381 if (/^SEQRES / && $all_headers) {
287 8         19 my ($rol) = unpack "x8 a62", $_;
288 8         16 $header{'seqres'} .= $rol;
289             } # SEQRES
290              
291             # MODRES line(s)
292 290 50 33     375 if (/^MODRES / && $all_headers) {
293 0         0 my ($rol) = unpack "x7 a63", $_;
294 0         0 $header{'modres'} .= $rol;
295             } # MODRES
296              
297             # HET line(s)
298 290 100 66     360 if (/^HET / && $all_headers) {
299 3         7 my ($rol) = unpack "x7 a63", $_;
300 3         7 $header{'het'} .= $rol;
301             } # HET
302              
303             # HETNAM line(s)
304 290 100 66     394 if (/^HETNAM / && $all_headers) {
305 1         3 my ($rol) = unpack "x8 a62", $_;
306 1         2 $header{'hetnam'} .= $rol;
307             } # HETNAM
308              
309             # HETSYN line(s)
310 290 50 33     385 if (/^HETSYN / && $all_headers) {
311 0         0 my ($rol) = unpack "x8 a62", $_;
312 0         0 $header{'hetsyn'} .= $rol;
313             } # HETSYN
314              
315             # FORMUL line(s)
316 290 100 66     369 if (/^FORMUL / && $all_headers) {
317 4         10 my ($rol) = unpack "x8 a62", $_;
318 4         14 $header{'formul'} .= $rol;
319             } # FORMUL
320              
321             # HELIX line(s)
322             # store as specific object ??
323 290 100 66     443 if (/^HELIX / && $all_headers) {
324 2         4 my ($rol) = unpack "x7 a69", $_;
325 2         4 $header{'helix'} .= $rol;
326             } # HELIX
327              
328             # SHEET line(s)
329             # store as specific object ??
330 290 100 66     400 if (/^SHEET / && $all_headers) {
331 3         7 my ($rol) = unpack "x7 a63", $_;
332 3         7 $header{'sheet'} .= $rol;
333             } # SHEET
334              
335             # TURN line(s)
336             # store as specific object ??
337 290 50 33     361 if (/^TURN / && $all_headers) {
338 0         0 my ($rol) = unpack "x7 a63", $_;
339 0         0 $turn .= $rol;
340 0         0 $header{'turn'} = $turn;
341             } # TURN
342              
343             # SSBOND line(s)
344             # store in connection-like object (see parsing of CONECT record)
345 290 100 66     392 if (/^SSBOND / && $all_headers) {
346 3         7 my ($rol) = unpack "x7 a65", $_;
347 3         6 $ssbond .= $rol;
348 3         4 $header{'ssbond'} = $ssbond;
349             } # SSBOND
350              
351             # LINK
352             # store like SSBOND ?
353 290 100 66     403 if (/^LINK / && $all_headers) {
354 3         7 my ($rol) = unpack "x12 a60", $_;
355 3         5 $link .= $rol;
356 3         5 $header{'link'} = $link;
357             } # LINK
358              
359             # HYDBND
360             # store like SSBOND
361 290 50 33     417 if (/^HYDBND / && $all_headers) {
362 0         0 my ($rol) = unpack "x12 a60", $_;
363 0         0 $hydbnd .= $rol;
364 0         0 $header{'hydbnd'} = $hydbnd;
365             } # HYDBND
366              
367             # SLTBRG
368             # store like SSBOND ?
369 290 50 33     374 if (/^SLTBRG / && $all_headers) {
370 0         0 my ($rol) = unpack "x12 a60",$_;
371 0         0 $sltbrg .= $rol;
372 0         0 $header{'sltbrg'} = $sltbrg;
373             } # SLTBRG
374              
375             # CISPEP
376             # store like SSBOND ?
377 290 50 33     404 if (/^CISPEP / && $all_headers) {
378 0         0 my ($rol) = unpack "x7 a52", $_;
379 0         0 $cispep .= $rol;
380 0         0 $header{'cispep'} = $cispep;
381             }
382              
383             # SITE line(s)
384 290 50 33     360 if (/^SITE / && $all_headers) {
385 0         0 my ($rol) = unpack "x7 a54", $_;
386 0         0 $site .= $rol;
387 0         0 $header{'site'} = $site;
388             } # SITE
389              
390             # CRYST1 line
391             # store in some crystallographic subobject ?
392 290 100 66     367 if (/^CRYST1/ && $all_headers) {
393 2         5 my ($rol) = unpack "x6 a64", $_;
394 2         3 $cryst1 .= $rol;
395 2         4 $header{'cryst1'} = $cryst1;
396             } # CRYST1
397              
398             # ORIGXn line(s) (n=1,2,3)
399 290 100 66     388 if (/^(ORIGX\d) / && $all_headers) {
400 6         16 my $origxn = lc($1);
401 6         18 my ($rol) = unpack "x10 a45", $_;
402 6         19 $header{$origxn} .= $rol;
403             } # ORIGXn
404              
405             # SCALEn line(s) (n=1,2,3)
406 290 100 66     382 if (/^(SCALE\d) / && $all_headers) {
407 6         11 my $scalen = lc($1);
408 6         13 my ($rol) = unpack "x10 a45", $_;
409 6         16 $header{$scalen} .= $rol;
410             } # SCALEn
411              
412             # MTRIXn line(s) (n=1,2,3)
413 290 50 33     370 if (/^(MTRIX\d) / && $all_headers) {
414 0         0 my $mtrixn = lc($1);
415 0         0 my ($rol) = unpack "x7 a53", $_;
416 0         0 $header{$mtrixn} .= $rol;
417             } # MTRIXn
418              
419             # TVECT line(s)
420 290 100 66     348 if (/^TVECT / && $all_headers) {
421 1         3 my ($rol) = unpack "x7 a63", $_;
422 1         1 $tvect .= $rol;
423 1         2 $header{'tvect'} = $tvect;
424             }
425              
426             # Get next line.
427 290         456 $buffer = $self->_readline;
428             }
429              
430             # store %header entries a annotations
431 2 50       4 if (%header) {
432 2         9 for my $record (keys %header) {
433 42         63 my $sim = Bio::Annotation::SimpleValue->new();
434 42         75 $sim->value($header{$record});
435 42         58 $struc->annotation->add_Annotation($record, $sim);
436             }
437             }
438             # store %remark entries as annotations
439 2 50       5 if (%remark) {
440 2         6 for my $remark_num (keys %remark) {
441 17         25 my $sim = Bio::Annotation::SimpleValue->new();
442 17         59 $sim->value($remark{$remark_num});
443 17         24 $struc->annotation->add_Annotation("remark_$remark_num", $sim);
444             }
445             }
446              
447             # Coordinate section, the real meat
448             #
449             # $_ contains a line beginning with (ATOM|MODEL)
450              
451 2         5 $buffer = $_;
452              
453              
454 2 50 33     16 if (defined($buffer) && $buffer =~ /^(ATOM |MODEL |HETATM)/ ) { # can you have an entry without ATOM ?
455 2         4 while( defined ($buffer) ) { # (yes : 1a7z )
456             # read in one model at a time
457 2         5 my $model = $self->_read_PDB_coordinate_section(\$buffer, $struc);
458             # add this to $struc
459 2         8 $struc->add_model($struc, $model);
460              
461 2 50 33     13 if ($buffer && $buffer !~ /^MODEL /) { # if we get here we have multiple MODELs
462 2         4 last;
463             }
464             }
465             }
466             else {
467 0         0 $self->throw("Could not find a coordinate section in this record\n");
468             }
469              
470              
471 2         6 until( !defined $buffer ) {
472 23         23 $_ = $buffer;
473              
474             # CONNECT records
475 23 100       53 if (/^CONECT/) {
476             # do not differentiate between different type of connect (column dependant)
477 19         20 my $conect_unpack = "x6 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5";
478 19         62 my (@conect) = unpack $conect_unpack, $_;
479 19         34 for my $k (0 .. $#conect) {
480 209         578 $conect[$k] =~ s/\s//g;
481             }
482 19         23 my $source = shift @conect;
483 19         19 my $type;
484 19         22 for my $k (0 .. 9) {
485 190 100       284 next unless ($conect[$k] =~ /^\d+$/);
486             # 0..3 bond
487 32 50 0     48 if( $k <= 3 ) {
    0 0        
    0 0        
      0        
488 32         26 $type = "bond";
489             }
490             # 4..5,7..8 hydrogen bonded
491             elsif( ($k >= 4 && $k <= 5) || ($k >= 7 && $k <= 8) ) {
492 0         0 $type = "hydrogen";
493             }
494             # 6, 9 salt bridged
495             elsif( $k == 6 || $k == 9 ) {
496 0         0 $type = "saltbridged";
497             } else {
498 0         0 $self->throw("k has impossible value ($k), check brain");
499             }
500 32         56 $struc->conect($source, $conect[$k], $type);
501             }
502             }
503              
504             # MASTER record
505 23 100       39 if (/^MASTER /) {
506             # the numbers in here a checksums, we should use them :)
507 2         6 my ($rol) = unpack "x10 a60", $_;
508 2         7 $struc->master($rol);
509             }
510              
511 23 100       32 if (/^END/) {
512             # this it the end ...
513             }
514              
515 23         34 $buffer = $self->_readline;
516             }
517              
518              
519 2         40 return $struc;
520             }
521              
522             =head2 write_structure
523              
524             Title : write_structure
525             Usage : $stream->write_structure($struc)
526             Function: writes the $struc object (must be a Bio::Structure) to the stream
527             Returns : 1 for success and 0 for error
528             Args : Bio::Structure object
529              
530             =cut
531              
532             sub write_structure {
533 1     1 1 6 my ($self, $struc) = @_;
534 1 50       3 if( !defined $struc ) {
535 0         0 $self->throw("Attempting to write with no structure!");
536             }
537              
538 1 50 33     8 if( ! ref $struc || ! $struc->isa('Bio::Structure::StructureI') ) {
539 0         0 $self->throw(" $struc is not a StructureI compliant module.");
540             }
541 1         2 my ($ann, $string, $output_string, $key);
542             # HEADER
543 1         4 ($ann) = $struc->annotation->get_Annotations("header");
544 1 50       4 if (defined $ann) {
545 1         3 $string = $ann->as_text;
546 1         4 $string =~ s/^Value: //;
547 1         5 $output_string = pack ("A10 A56", "HEADER", $string);
548             } else { # not read in via read_structure, create HEADER line
549 0         0 my $id = $struc->id;
550 0 0       0 if (!$id) {
551 0         0 $id = "UNK1";
552             }
553 0 0       0 if (length($id) > 4) {
554 0         0 $id = substr($id,0,4);
555             }
556 0         0 my $classification = "DEFAULT CLASSIFICATION";
557 0         0 my $dep_date = "24-JAN-70";
558 0         0 $output_string = pack ("A10 A40 A12 A4", "HEADER", $classification, $dep_date, $id);
559             }
560 1         5 $output_string .= " " x (80 - length($output_string) );
561 1         13 $self->_print("$output_string\n");
562              
563 1         1 my (%header);
564 1         3 for $key ($struc->annotation->get_all_annotation_keys) {
565 38         43 $header{$key} = 1;;
566             }
567              
568 1 50       4 exists $header{'obslte'} && $self->_write_PDB_simple_record(-name => "OBSLTE ", -cont => "9-10",
569             -annotation => $struc->annotation->get_Annotations("obslte"), -rol => "11-70");
570              
571 1 50       4 exists $header{'title'} && $self->_write_PDB_simple_record(-name => "TITLE ", -cont => "9-10",
572             -annotation => $struc->annotation->get_Annotations("title"), -rol => "11-70");
573              
574 1 50       4 exists $header{'caveat'} && $self->_write_PDB_simple_record(-name => "CAVEAT ", -cont => "9-10",
575             -annotation => $struc->annotation->get_Annotations("caveat"), -rol => "12-70");
576              
577 1 50       4 exists $header{'compnd'} && $self->_write_PDB_simple_record(-name => "COMPND ", -cont => "9-10",
578             -annotation => $struc->annotation->get_Annotations("compnd"), -rol => "11-70");
579              
580 1 50       5 exists $header{'source'} && $self->_write_PDB_simple_record(-name => "SOURCE ", -cont => "9-10",
581             -annotation => $struc->annotation->get_Annotations("source"), -rol => "11-70");
582              
583 1 50       6 exists $header{'keywds'} && $self->_write_PDB_simple_record(-name => "KEYWDS ", -cont => "9-10",
584             -annotation => $struc->annotation->get_Annotations("keywds"), -rol => "11-70");
585              
586 1 50       5 exists $header{'expdta'} && $self->_write_PDB_simple_record(-name => "EXPDTA ", -cont => "9-10",
587             -annotation => $struc->annotation->get_Annotations("expdta"), -rol => "11-70");
588              
589 1 50       5 exists $header{'author'} && $self->_write_PDB_simple_record(-name => "AUTHOR ", -cont => "9-10",
590             -annotation => $struc->annotation->get_Annotations("author"), -rol => "11-70");
591              
592 1 50       5 exists $header{'revdat'} && $self->_write_PDB_simple_record(-name => "REVDAT ",
593             -annotation => $struc->annotation->get_Annotations("revdat"), -rol => "8-66");
594              
595 1 50       6 exists $header{'sprsde'} && $self->_write_PDB_simple_record(-name => "SPRSDE ", -cont => "9-10",
596             -annotation => $struc->annotation->get_Annotations("sprsde"), -rol => "12-70");
597              
598             # JRNL en REMARK 1
599 1         2 my ($jrnl_done, $remark_1_counter);
600 1 50       4 if ( !exists $header{'jrnl'} ) {
601 0         0 $jrnl_done = 1;
602             }
603 1         4 foreach my $ref ($struc->annotation->get_Annotations('reference') ) {
604 1 50       3 if( !$jrnl_done ) { # JRNL record
605 1 50       4 $ref->authors && $self->_write_PDB_simple_record(-name => "JRNL AUTH",
606             -cont => "17-18", -rol => "20-70", -string => $ref->authors );
607 1 50       4 $ref->title && $self->_write_PDB_simple_record(-name => "JRNL TITL",
608             -cont => "17-18", -rol => "20-70", -string => $ref->title );
609 1 50       3 $ref->editors && $self->_write_PDB_simple_record(-name => "JRNL EDIT",
610             -cont => "17-18", -rol => "20-70", -string => $ref->editors );
611 1 50       3 $ref->location && $self->_write_PDB_simple_record(-name => "JRNL REF ",
612             -cont => "17-18", -rol => "20-70", -string => $ref->location );
613 1 50       3 $ref->editors && $self->_write_PDB_simple_record(-name => "JRNL EDIT",
614             -cont => "17-18", -rol => "20-70", -string => $ref->editors );
615 1 50       3 $ref->encoded_ref && $self->_write_PDB_simple_record(-name => "JRNL REFN",
616             -cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
617 1         3 $jrnl_done = 1;
618             } else { # REMARK 1
619 0 0       0 if (!$remark_1_counter) { # header line
620 0         0 my $remark_1_header_line = "REMARK 1" . " " x 70;
621 0         0 $self->_print("$remark_1_header_line\n");
622 0         0 $remark_1_counter = 1;
623             }
624             # per reference header
625 0         0 my $rem_line = "REMARK 1 REFERENCE " . $remark_1_counter;
626 0         0 $rem_line .= " " x (80 - length($rem_line) );
627 0         0 $self->_print($rem_line,"\n");
628 0 0       0 $ref->authors && $self->_write_PDB_simple_record(-name => "REMARK 1 AUTH",
629             -cont => "17-18", -rol => "20-70", -string => $ref->authors );
630 0 0       0 $ref->title && $self->_write_PDB_simple_record(-name => "REMARK 1 TITL",
631             -cont => "17-18", -rol => "20-70", -string => $ref->title );
632 0 0       0 $ref->editors && $self->_write_PDB_simple_record(-name => "REMARK 1 EDIT",
633             -cont => "17-18", -rol => "20-70", -string => $ref->editors );
634 0 0       0 $ref->location && $self->_write_PDB_simple_record(-name => "REMARK 1 REF ",
635             -cont => "17-18", -rol => "20-70", -string => $ref->location );
636 0 0       0 $ref->editors && $self->_write_PDB_simple_record(-name => "REMARK 1 EDIT",
637             -cont => "17-18", -rol => "20-70", -string => $ref->editors );
638 0 0       0 $ref->encoded_ref && $self->_write_PDB_simple_record(-name => "REMARK 1 REFN",
639             -cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
640 0         0 $remark_1_counter++;
641             }
642             }
643 1 50       3 if (! defined $remark_1_counter ) { # no remark 1 record written yet
644 1         2 my $remark_1_header_line = "REMARK 1" . " " x 70;
645 1         3 $self->_print("$remark_1_header_line\n"); # write dummy (we need this line)
646             }
647              
648             # REMARK's (not 1 at the moment, references)
649 1         2 my (%remarks, $remark_num);
650 1         7 for $key (keys %header) {
651 38 100       50 next unless ($key =~ /^remark_(\d+)$/);
652 13 50       25 next if ($1 == 1);
653 13         19 $remarks{$1} = 1;
654             }
655 1         9 for $remark_num (sort {$a <=> $b} keys %remarks) {
  34         31  
656 13         21 $self->_write_PDB_remark_record($struc, $remark_num);
657             }
658              
659 1 50       6 exists $header{'dbref'} && $self->_write_PDB_simple_record(-name => "DBREF ",
660             -annotation => $struc->annotation->get_Annotations("dbref"), -rol => "8-68");
661 1 50       4 exists $header{'seqadv'} && $self->_write_PDB_simple_record(-name => "SEQADV ",
662             -annotation => $struc->annotation->get_Annotations("seqadv"), -rol => "8-70");
663 1 50       4 exists $header{'seqres'} && $self->_write_PDB_simple_record(-name => "SEQRES ",
664             -annotation => $struc->annotation->get_Annotations("seqres"), -rol => "9-70");
665 1 50       3 exists $header{'modres'} && $self->_write_PDB_simple_record(-name => "MODRES ",
666             -annotation => $struc->annotation->get_Annotations("modres"), -rol => "8-70");
667 1 50       5 exists $header{'het'} && $self->_write_PDB_simple_record(-name => "HET ",
668             -annotation => $struc->annotation->get_Annotations("het"), -rol => "8-70");
669 1 50       5 exists $header{'hetnam'} && $self->_write_PDB_simple_record(-name => "HETNAM ",
670             -annotation => $struc->annotation->get_Annotations("hetnam"), -rol => "9-70");
671 1 50       4 exists $header{'hetsyn'} && $self->_write_PDB_simple_record(-name => "HETSYN ",
672             -annotation => $struc->annotation->get_Annotations("hetsyn"), -rol => "9-70");
673 1 50       5 exists $header{'formul'} && $self->_write_PDB_simple_record(-name => "FORMUL ",
674             -annotation => $struc->annotation->get_Annotations("formul"), -rol => "9-70");
675 1 50       4 exists $header{'helix'} && $self->_write_PDB_simple_record(-name => "HELIX ",
676             -annotation => $struc->annotation->get_Annotations("helix"), -rol => "8-76");
677 1 50       3 exists $header{'sheet'} && $self->_write_PDB_simple_record(-name => "SHEET ",
678             -annotation => $struc->annotation->get_Annotations("sheet"), -rol => "8-70");
679 1 50       3 exists $header{'turn'} && $self->_write_PDB_simple_record(-name => "TURN ",
680             -annotation => $struc->annotation->get_Annotations("turn"), -rol => "8-70");
681 1 50       3 exists $header{'ssbond'} && $self->_write_PDB_simple_record(-name => "SSBOND ",
682             -annotation => $struc->annotation->get_Annotations("ssbond"), -rol => "8-72");
683 1 50       4 exists $header{'link'} && $self->_write_PDB_simple_record(-name => "LINK ",
684             -annotation => $struc->annotation->get_Annotations("link"), -rol => "13-72");
685 1 50       4 exists $header{'hydbnd'} && $self->_write_PDB_simple_record(-name => "HYDBND ",
686             -annotation => $struc->annotation->get_Annotations("hydbnd"), -rol => "13-72");
687 1 50       3 exists $header{'sltbrg'} && $self->_write_PDB_simple_record(-name => "SLTBRG ",
688             -annotation => $struc->annotation->get_Annotations("sltbrg"), -rol => "13-72");
689 1 50       2 exists $header{'cispep'} && $self->_write_PDB_simple_record(-name => "CISPEP ",
690             -annotation => $struc->annotation->get_Annotations("cispep"), -rol => "8-59");
691 1 50       4 exists $header{'site'} && $self->_write_PDB_simple_record(-name => "SITE ",
692             -annotation => $struc->annotation->get_Annotations("site"), -rol => "8-61");
693 1 50       5 exists $header{'cryst1'} && $self->_write_PDB_simple_record(-name => "CRYST1",
694             -annotation => $struc->annotation->get_Annotations("cryst1"), -rol => "7-70");
695 1         3 for my $k (1..3) {
696 3         5 my $origxn = "origx".$k;
697 3         6 my $ORIGXN = uc($origxn)." ";
698 3 50       9 exists $header{$origxn} && $self->_write_PDB_simple_record(-name => $ORIGXN,
699             -annotation => $struc->annotation->get_Annotations($origxn), -rol => "11-55");
700             }
701 1         3 for my $k (1..3) {
702 3         5 my $scalen = "scale".$k;
703 3         5 my $SCALEN = uc($scalen)." ";
704 3 50       11 exists $header{$scalen} && $self->_write_PDB_simple_record(-name => $SCALEN,
705             -annotation => $struc->annotation->get_Annotations($scalen), -rol => "11-55");
706             }
707 1         2 for my $k (1..3) {
708 3         6 my $mtrixn = "mtrix".$k;
709 3         4 my $MTRIXN = uc($mtrixn)." ";
710 3 50       6 exists $header{$mtrixn} && $self->_write_PDB_simple_record(-name => $MTRIXN,
711             -annotation => $struc->annotation->get_Annotations($mtrixn), -rol => "8-60");
712             }
713 1 50       4 exists $header{'tvect'} && $self->_write_PDB_simple_record(-name => "TVECT ",
714             -annotation => $struc->annotation->get_Annotations("tvect"), -rol => "8-70");
715              
716             # write out coordinate section
717             #
718 1         3 my %het_res; # hetero residues
719 1         3 $het_res{'HOH'} = 1; # water is default
720 1 50       3 if (exists $header{'het'}) {
721 1         3 my ($het_line) = ($struc->annotation->get_Annotations("het"))[0]->as_text;
722 1         4 $het_line =~ s/^Value: //;
723 1         3 for ( my $k = 0; $k <= length $het_line ; $k += 63) {
724 3         5 my $l = substr $het_line, $k, 63;
725 3         9 $l =~ s/^\s*(\S+)\s+.*$/$1/;
726 3         7 $het_res{$l} = 1;
727             }
728             }
729 1         4 for my $model ($struc->get_models) {
730             # more then one model ?
731 1 50       3 if ($struc->get_models > 1) {
732 0         0 my $model_line = sprintf("MODEL %4d", $model->id);
733 0         0 $model_line .= " " x (80 - length($model_line) );
734 0         0 $self->_print($model_line, "\n");
735             }
736 1         3 for my $chain ($struc->get_chains($model)) {
737 4         9 my ($residue, $atom, $resname, $resnum, $atom_line, $atom_serial, $atom_icode, $chain_id);
738 4         0 my ($prev_resname, $prev_resnum, $prev_atomicode); # need these for TER record
739 4         4 my $last_record = ""; # Used to spot an ATOM -> HETATM change within a chain
740 4         129 $chain_id = $chain->id;
741 4 100       8 if ( $chain_id eq "default" ) {
742 1         1 $chain_id = " ";
743             }
744 4         17 $self->debug("model_id: $model->id chain_id: $chain_id\n");
745 4         10 for $residue ($struc->get_residues($chain)) {
746 60         103 ($resname, $resnum) = split /-/, $residue->id;
747 60         125 for $atom ($struc->get_atoms($residue)) {
748 171 100       247 if ($het_res{$resname}) { # HETATM
749 45 50 66     87 if ( $resname ne "HOH" && $last_record eq "ATOM " ) {
750             # going from ATOM -> HETATM, we have to write TER
751 0         0 my $ter_line = "TER ";
752 0         0 $ter_line .= sprintf("%5d", $atom_serial + 1);
753 0         0 $ter_line .= " ";
754 0         0 $ter_line .= sprintf("%3s ", $prev_resname);
755 0         0 $ter_line .= $chain_id;
756 0         0 $ter_line .= sprintf("%4d", $prev_resnum);
757 0 0       0 $ter_line .= $atom_icode ? $prev_atomicode : " "; # 27
758 0         0 $ter_line .= " " x (80 - length $ter_line); # extend to 80 chars
759 0         0 $self->_print($ter_line,"\n");
760             }
761 45         47 $atom_line = "HETATM";
762             } else {
763 126         240 $atom_line = "ATOM ";
764             }
765 171         159 $last_record = $atom_line;
766 171         247 $atom_line .= sprintf("%5d ", $atom->serial);
767 171         281 $atom_serial = $atom->serial; # we need it for TER record
768 171         249 $atom_icode = $atom->icode;
769             # remember some stuff if next iteration needs writing TER
770 171         169 $prev_resname = $resname;
771 171         171 $prev_resnum = $resnum;
772 171         144 $prev_atomicode = $atom_icode;
773             # getting the name of the atom correct is subtrivial
774 171         217 my $atom_id = $atom->id;
775             # is pdb_atomname set, then use this (most probably set when
776             # reading in the PDB record)
777 171         246 my $pdb_atomname = $atom->pdb_atomname;
778 171 50       219 if( defined $pdb_atomname ) {
779 171         259 $atom_line .= sprintf("%-4s", $pdb_atomname);
780             } else {
781             # start (educated) guessing
782 0         0 my $element = $atom->element;
783 0 0 0     0 if( defined $element && $element ne "H") {
784             # element should be at first two positions (right justified)
785             # ie. Calcium should be "CA "
786             # C alpha should be " CA "
787 0 0       0 if( length($element) == 2 ) {
788 0         0 $atom_line .= sprintf("%-4s", $atom->id);
789             } else {
790 0         0 $atom_line .= sprintf(" %-3s", $atom->id);
791             }
792             } else { # old behaviour do a best guess
793 0 0       0 if ($atom->id =~ /^\dH/) { # H: four positions, left justified
    0          
794 0         0 $atom_line .= sprintf("%-4s", $atom->id);
795             } elsif (length($atom_id) == 4) {
796 0 0       0 if ($atom_id =~ /^(H\d\d)(\d)$/) { # turn H123 into 3H12
797 0         0 $atom_line .= $2.$1;
798             } else { # no more guesses, no more alternatives
799 0         0 $atom_line .= $atom_id;
800             }
801             } else { # if we get here and it is not correct let me know
802 0         0 $atom_line .= sprintf(" %-3s", $atom->id);
803             }
804             }
805             }
806             # we don't do alternate location at this moment
807 171         167 $atom_line .= " "; # 17
808 171         190 $atom_line .= sprintf("%3s",$resname); # 18-20
809 171         178 $atom_line .= " ".$chain_id; # 21, 22
810 171         205 $atom_line .= sprintf("%4d", $resnum); # 23-26
811 171 50       230 $atom_line .= $atom->icode ? $atom->icode : " "; # 27
812 171         173 $atom_line .= " "; # 28-30
813 171         248 $atom_line .= sprintf("%8.3f", $atom->x); # 31-38
814 171         308 $atom_line .= sprintf("%8.3f", $atom->y); # 39-46
815 171         272 $atom_line .= sprintf("%8.3f", $atom->z); # 47-54
816 171         264 $atom_line .= sprintf("%6.2f", $atom->occupancy); # 55-60
817 171         276 $atom_line .= sprintf("%6.2f", $atom->tempfactor); # 61-66
818 171         205 $atom_line .= " "; # 67-72
819 171 50       222 $atom_line .= $atom->segID ? # segID 73-76
820             sprintf("%-4s", $atom->segID) :
821             " ";
822 171 50       260 $atom_line .= $atom->element ?
823             sprintf("%2s", $atom->element) :
824             " ";
825 171 50       289 $atom_line .= $atom->charge ?
826             sprintf("%2s", $atom->charge) :
827             " ";
828              
829 171         284 $self->_print($atom_line,"\n");
830             }
831             }
832             # write out TER record
833 4 100       11 if ( $resname ne "HOH" ) {
834 3         4 my $ter_line = "TER ";
835 3         8 $ter_line .= sprintf("%5d", $atom_serial + 1);
836 3         4 $ter_line .= " ";
837 3         4 $ter_line .= sprintf("%3s ", $resname);
838 3         5 $ter_line .= $chain_id;
839 3         5 $ter_line .= sprintf("%4d", $resnum);
840 3 50       6 $ter_line .= $atom_icode ? $atom_icode : " "; # 27
841 3         6 $ter_line .= " " x (80 - length $ter_line); # extend to 80 chars
842 3         5 $self->_print($ter_line,"\n");
843             }
844             }
845 1 50       4 if ($struc->get_models > 1) { # we need ENDMDL
846 0         0 my $endmdl_line = "ENDMDL" . " " x 74;
847 0         0 $self->_print($endmdl_line, "\n");
848             }
849             } # for my $model
850              
851             # CONECT
852 1         4 my @sources = $struc->get_all_conect_source;
853 1         3 my ($conect_line,@conect, @bond, @hydbond, @saltbridge, $to, $type);
854 1         2 for my $source (@sources) {
855             # get all conect's
856 8         13 my @conect = $struc->conect($source);
857             # classify
858 8         9 for my $con (@conect) {
859 12         19 ($to, $type) = split /_/, $con;
860 12 50       19 if($type eq "bond") {
    0          
    0          
861 12         19 push @bond, $to;
862             } elsif($type eq "hydrogenbonded") {
863 0         0 push @hydbond, $to;
864             } elsif($type eq "saltbridged") {
865 0         0 push @saltbridge, $to;
866             } else {
867 0         0 $self->throw("type $type is unknown for conect");
868             }
869             }
870             # and write out CONECT lines as long as there is something
871             # in one of the arrays
872 8   66     12 while ( @bond || @hydbond || @saltbridge) {
      66        
873 8         9 my ($b, $hb, $sb);
874 8         17 $conect_line = "CONECT". sprintf("%5d", $source);
875 8         10 for my $k (0..3) {
876 32         31 $b = shift @bond;
877 32 100       44 $conect_line .= $b ? sprintf("%5d", $b) : " ";
878             }
879 8         9 for my $k (4..5) {
880 16         14 $hb = shift @hydbond;
881 16 50       19 $conect_line .= $hb ? sprintf("%5d", $hb) : " ";
882             }
883 8         9 $sb = shift @saltbridge;
884 8 50       14 $conect_line .= $sb ? sprintf("%5d", $sb) : " ";
885 8         10 for my $k (7..8) {
886 16         16 $hb = shift @hydbond;
887 16 50       17 $conect_line .= $hb ? sprintf("%5d", $hb) : " ";
888             }
889 8         9 $sb = shift @saltbridge;
890 8 50       11 $conect_line .= $sb ? sprintf("%5d", $sb) : " ";
891              
892 8         10 $conect_line .= " " x (80 - length($conect_line) );
893 8         13 $self->_print($conect_line, "\n");
894             }
895             }
896              
897             # MASTER line contains checksums, we should calculate them of course :)
898 1         3 my $master_line = "MASTER " . $struc->master;
899 1         3 $master_line .= " " x (80 - length($master_line) );
900 1         3 $self->_print($master_line, "\n");
901              
902 1         2 my $end_line = "END" . " " x 77;
903 1         2 $self->_print($end_line,"\n");
904              
905             }
906              
907             =head2 _filehandle
908              
909             Title : _filehandle
910             Usage : $obj->_filehandle($newval)
911             Function:
912             Example :
913             Returns : value of _filehandle
914             Args : newvalue (optional)
915              
916             =cut
917              
918             sub _filehandle{
919 0     0   0 my ($obj,$value) = @_;
920 0 0       0 if( defined $value) {
921 0         0 $obj->{'_filehandle'} = $value;
922             }
923 0         0 return $obj->{'_filehandle'};
924              
925             }
926              
927             =head2 _noatom
928              
929             Title : _noatom
930             Usage : $obj->_noatom($newval)
931             Function:
932             Example :
933             Returns : value of _noatom
934             Args : newvalue (optional)
935              
936              
937             =cut
938              
939             sub _noatom{
940 2     2   4 my ($obj,$value) = @_;
941 2 50       4 if( defined $value) {
942 0         0 $obj->{'_noatom'} = $value;
943             }
944 2         3 return $obj->{'_noatom'};
945              
946             }
947              
948             =head2 _noheader
949              
950             Title : _noheader
951             Usage : $obj->_noheader($newval)
952             Function:
953             Example :
954             Returns : value of _noheader
955             Args : newvalue (optional)
956              
957              
958             =cut
959              
960             sub _noheader{
961 2     2   3 my ($obj,$value) = @_;
962 2 50       5 if( defined $value) {
963 0         0 $obj->{'_noheader'} = $value;
964             }
965 2         3 return $obj->{'_noheader'};
966              
967             }
968              
969             =head2 _read_PDB_singlecontline
970              
971             Title : _read_PDB_singlecontline
972             Usage : $obj->_read_PDB_singlecontline($record, $fromto, $buffer))
973             Function: read single continued record from PDB
974             Returns : concatenated record entry (between $fromto columns)
975             Args : record, colunm delimiters, buffer
976              
977             =cut
978              
979             sub _read_PDB_singlecontline {
980 9     9   14 my ($self, $record, $fromto, $buffer) = @_;
981 9         9 my $concat_line;
982              
983 9         18 my ($begin, $end) = (split (/-/, $fromto));
984 9         13 my $unpack_string = "x8 a2 ";
985 9 50       15 if($begin == 12) { # one additional space
986 0         0 $unpack_string .= "x1 a59";
987             } else {
988 9         12 $unpack_string .= "a60";
989             }
990 9         10 $_ = $$buffer;
991 9   66     15 while (defined( $_ ||= $self->_readline ) ) {
992 25 100       123 if ( /^$record/ ) {
993 16         43 my($cont, $rol) = unpack $unpack_string, $_;
994 16 100 66     41 if($cont =~ /\d$/ && $begin == 11) { # continuation line
995             # and text normally at pos 11
996 7         16 $rol =~ s/^\s//; # strip leading space
997             }
998             ## no space (store litteraly) $concat_line .= $rol . " ";
999 16         27 $concat_line .= $rol;
1000             } else {
1001 9         13 last;
1002             }
1003              
1004 16         44 $_ = undef;
1005             }
1006 9         22 $concat_line =~ s/\s$//; # remove trailing space
1007 9         14 $$buffer = $_;
1008              
1009 9         14 return $concat_line;
1010             }
1011              
1012              
1013             =head2 _read_PDB_jrnl
1014              
1015             Title : _read_PDB_jrnl
1016             Usage : $obj->_read_PDB_jrnl($\buffer))
1017             Function: read jrnl record from PDB
1018             Returns : Bio::Annotation::Reference object
1019             Args :
1020              
1021             =cut
1022              
1023             sub _read_PDB_jrnl {
1024 2     2   4 my ($self, $buffer) = @_;
1025              
1026 2         2 $_ = $$buffer;
1027 2         2 my ($auth, $titl,$edit,$ref,$publ,$refn, $pmid, $doi);
1028 2   66     5 while (defined( $_ ||= $self->_readline )) {
1029 15 100       37 if (/^JRNL /) {
1030             # this code belgons in a seperate method (shared with
1031             # remark 1 parsing)
1032 13         38 my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
1033 13 100       30 $auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
1034 13 100       20 $titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
1035 13 50       18 $edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
1036 13 100       18 $ref = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
1037 13 50       20 $publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
1038 13 100       16 $refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
1039 13 50       16 $pmid = $self->_concatenate_lines($pmid,$rol) if ($subr eq "PMID");
1040 13 50       16 $doi = $self->_concatenate_lines($doi,$rol) if ($subr eq "DOI");
1041             } else {
1042 2         2 last;
1043             }
1044              
1045 13         38 $_ = undef; # trigger reading of next line
1046             } # while
1047              
1048 2         4 $$buffer = $_;
1049 2         9 my $jrnl_ref = Bio::Annotation::Reference->new;
1050              
1051 2         6 $jrnl_ref->authors($auth);
1052 2         5 $jrnl_ref->title($titl);
1053 2         5 $jrnl_ref->location($ref);
1054 2         4 $jrnl_ref->publisher($publ);
1055 2         3 $jrnl_ref->editors($edit);
1056 2         7 $jrnl_ref->encoded_ref($refn);
1057 2         4 $jrnl_ref->pubmed($pmid);
1058 2         5 $jrnl_ref->doi($doi);
1059              
1060 2         4 return $jrnl_ref;
1061             } # sub _read_PDB_jrnl
1062              
1063              
1064             =head2 _read_PDB_remark_1
1065              
1066             Title : _read_PDB_remark_1
1067             Usage : $obj->_read_PDB_remark_1($\buffer))
1068             Function: read "remark 1" record from PDB
1069             Returns : array of Bio::Annotation::Reference objects
1070             Args :
1071              
1072             =cut
1073              
1074             sub _read_PDB_remark_1 {
1075 2     2   3 my ($self, $buffer) = @_;
1076              
1077 2         3 $_ = $$buffer;
1078 2         4 my ($auth, $titl,$edit,$ref,$publ,$refn,$refnum,$pmid, $doi);
1079 2         0 my @refs;
1080              
1081 2   66     5 while (defined( $_ ||= $self->_readline )) {
1082 59 100       126 if (/^REMARK 1 /) {
1083 57 100       103 if (/^REMARK 1\s+REFERENCE\s+(\d+)\s*/) {
1084 8         17 $refnum = $1;
1085 8 100       11 if ($refnum != 1) { # this is first line of a reference
1086 7         16 my $rref = Bio::Annotation::Reference->new;
1087 7         10 $rref->authors($auth);
1088 7         12 $rref->title($titl);
1089 7         12 $rref->location($ref);
1090 7         13 $rref->publisher($publ);
1091 7         13 $rref->editors($edit);
1092 7         12 $rref->encoded_ref($refn);
1093 7         11 $rref->pubmed($pmid);
1094 7         12 $rref->doi($doi);
1095 7         8 $auth = $titl = $edit = $ref = $publ = $refn = undef;
1096 7         10 push @refs, $rref;
1097             }
1098             } else {
1099             # this code belgons in a seperate method (shared with
1100             # remark 1 parsing)
1101 49         137 my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
1102 49 100       83 $auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
1103 49 100       74 $titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
1104 49 50       84 $edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
1105 49 100       62 $ref = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
1106 49 50       58 $publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
1107 49 100       64 $refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
1108 49 50       59 $pmid = $self->_concatenate_lines($pmid,$rol) if ($subr eq "PMID");
1109 49 50       61 $doi = $self->_concatenate_lines($doi,$rol) if ($subr eq "DOI");
1110             }
1111             } else {
1112             # have we seen any reference at all (could be single REMARK 1 line
1113 2 100       5 if ( ! defined ($refnum) ) {
1114 1         2 last; # get out of while()
1115             }
1116              
1117             # create last reference
1118 1         3 my $rref = Bio::Annotation::Reference->new;
1119 1         3 $rref->authors($auth);
1120 1         2 $rref->title($titl);
1121 1         1 $rref->location($ref);
1122 1         2 $rref->publisher($publ);
1123 1         2 $rref->editors($edit);
1124 1         2 $rref->encoded_ref($refn);
1125 1         2 $rref->pubmed($pmid);
1126 1         2 $rref->doi($doi);
1127 1         1 push @refs, $rref;
1128 1         1 last;
1129             }
1130              
1131 57         111 $_ = undef; # trigger reading of next line
1132             } # while
1133              
1134 2         4 $$buffer = $_;
1135              
1136 2         5 return @refs;
1137             } # sub _read_PDB_jrnl
1138              
1139              
1140             =head2 _read_PDB_coordinate_section
1141              
1142             Title : _read_PDB_coordinate_section
1143             Usage : $obj->_read_PDB_coordinate_section($\buffer))
1144             Function: read one model from a PDB
1145             Returns : Bio::Structure::Model object
1146             Args :
1147              
1148             =cut
1149              
1150             sub _read_PDB_coordinate_section {
1151 2     2   4 my ($self, $buffer, $struc) = @_;
1152 2         2 my ($model_num, $chain_name, $residue_name, $atom_name); # to keep track of state
1153 2         4 $model_num = "";
1154 2         2 $chain_name = "";
1155 2         1 $residue_name = "";
1156 2         2 $atom_name = "";
1157              
1158 2         2 my $atom_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x3 a8 a8 a8 a6 a6 x6 a4 a2 a2";
1159 2         2 my $anisou_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x1 a7 a7 a7 a7 a7 a7 a4 a2 a2";
1160              
1161 2         14 my $model = Bio::Structure::Model->new;
1162 2         6 $model->id('default');
1163 2         5 my $noatom = $self->_noatom;
1164 2         7 my ($chain, $residue, $atom, $old);
1165 2         0 my (%_ch_in_model); # which chains are already in this model
1166              
1167 2         4 $_ = $$buffer;
1168 2   66     7 while (defined( $_ ||= $self->_readline )) {
1169             # start of a new model
1170 659 50       1078 if (/^MODEL\s+(\d+)/) {
1171 0         0 $model_num = $1;
1172 0         0 $self->debug("_read_PDB_coor: parsing model $model_num\n");
1173 0         0 $model->id($model_num);
1174 0 0       0 if (/^MODEL\s+\d+\s+\S+/) { # old format (pre 2.1)
1175 0         0 $old = 1;
1176             }
1177             }
1178             # old hier ook setten XXX
1179             # ATOM lines, if first set chain
1180 659 100       1713 if (/^(ATOM |HETATM|SIGATM)/) {
1181 653         3150 my @line_elements = unpack $atom_unpack, $_;
1182 653         808 my $pdb_atomname = $line_elements[1]; # need to get this before removing spaces
1183 653         1109 for my $k (0 .. $#line_elements) {
1184 9795         15741 $line_elements[$k] =~ s/^\s+//; # remove leading space
1185 9795         12033 $line_elements[$k] =~ s/\s+$//; # remove trailing space
1186 9795 100       16256 $line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
1187             }
1188 653         1485 my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode, $x, $y, $z,
1189             $occupancy, $tempfactor, $segID, $element, $charge) = @line_elements;
1190 653 100       961 $chainID = 'default' if ( !defined $chainID );
1191 653 100       821 if ($chainID ne $chain_name) { # possibly a new chain
1192             # fix for bug #1187
1193             # we can have ATOM/HETATM of an already defined chain (A B A B)
1194             # e.g. 1abm
1195              
1196 5 50       18 if (exists $_ch_in_model{$chainID} ) { # we have already seen this chain in this model
1197 0         0 $chain = $_ch_in_model{$chainID};
1198             } else { # we create a new chain
1199 5         20 $chain = Bio::Structure::Chain->new;
1200 5         16 $struc->add_chain($model,$chain);
1201 5         12 $chain->id($chainID);
1202 5         7 $_ch_in_model{$chainID} = $chain;
1203             }
1204 5         10 $chain_name = $chain->id;
1205             }
1206             #my $res_name_num = $resname."-".$resseq;
1207 653         816 my $res_name_num = $resname."-".$resseq;
1208 653 50       792 $res_name_num .= '.'.$icode if $icode;
1209 653 100       763 if ($res_name_num ne $residue_name) { # new residue
1210 157         301 $residue = Bio::Structure::Residue->new;
1211 157         378 $struc->add_residue($chain,$residue);
1212 157         272 $residue->id($res_name_num);
1213 157         176 $residue_name = $res_name_num;
1214 157         161 $atom_name = ""; # only needed inside a residue
1215             }
1216             # get out of here if we don't want the atom objects
1217 653 50       813 if ($noatom) {
1218 0         0 $_ = undef;
1219 0         0 next;
1220             }
1221             # alternative location: only take first one
1222 653 0 33     850 if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
      33        
1223 0         0 $_ = undef; # trigger reading next line
1224 0         0 next;
1225             }
1226 653 50       1384 if (/^(ATOM |HETATM)/) { # ATOM / HETATM
1227 653         746 $atom_name = $atomname;
1228 653         1195 $atom = Bio::Structure::Atom->new;
1229 653         1388 $struc->add_atom($residue,$atom);
1230 653         1304 $atom->id($atomname);
1231 653         1168 $atom->pdb_atomname($pdb_atomname); # store away PDB atomname for writing out
1232 653         1088 $atom->serial($serial);
1233 653         1008 $atom->icode($icode);
1234 653         1032 $atom->x($x);
1235 653         1292 $atom->y($y);
1236 653         1059 $atom->z($z);
1237 653         1365 $atom->occupancy($occupancy);
1238 653         1015 $atom->tempfactor($tempfactor);
1239 653         1083 $atom->segID($segID); # deprecated but used by people
1240 653 50       917 if (! $old ) {
1241 653         1073 $atom->element($element);
1242 653         913 $atom->charge($charge);
1243             }
1244             }
1245             else { # SIGATM
1246 0         0 my $sigx = $x;
1247 0         0 my $sigy = $y;
1248 0         0 my $sigz = $z;
1249 0         0 my $sigocc = $occupancy;
1250 0         0 my $sigtemp = $tempfactor;
1251 0 0       0 if ($atom_name ne $atomname) { # something wrong with PDB file
1252 0         0 $self->throw("A SIGATM record should have the same $atomname as the previous record $atom_name\n");
1253             }
1254 0         0 $atom->sigx($sigx);
1255 0         0 $atom->sigy($sigy);
1256 0         0 $atom->sigz($sigz);
1257 0         0 $atom->sigocc($sigocc);
1258 0         0 $atom->sigtemp($sigtemp);
1259              
1260             }
1261             } # ATOM|HETARM|SIGATM
1262              
1263             # ANISOU | SIGUIJ lines
1264 659 50       1502 if (/^(ANISOU|SIGUIJ)/) {
1265 0 0       0 if ($noatom) {
1266 0         0 $_ = undef;
1267 0         0 next;
1268             }
1269 0         0 my @line_elements = unpack $anisou_unpack, $_;
1270 0         0 for my $k (0 .. $#line_elements) {
1271 0         0 $line_elements[$k] =~ s/^\s+//; # remove leading space
1272 0         0 $line_elements[$k] =~ s/\s+$//; # remove trailing space
1273 0 0       0 $line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
1274             }
1275 0         0 my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode,
1276             $u11,$u22, $u33, $u12, $u13, $u23, $segID, $element, $charge) = @line_elements;
1277 0         0 $self->debug("read_PDB_coor: parsing ANISOU record: $serial $atomname\n");
1278 0 0 0     0 if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
      0        
1279 0         0 $_ = undef;
1280 0         0 next;
1281             }
1282 0 0       0 if (/^ANISOU/) {
1283 0 0       0 if ($atom_name ne $atomname) { # something wrong with PDB file
1284 0         0 $self->throw("A ANISOU record should have the same $atomname as the previous record $atom_name\n");
1285             }
1286 0         0 $atom->aniso("u11",$u11);
1287 0         0 $atom->aniso("u22",$u22);
1288 0         0 $atom->aniso("u33",$u33);
1289 0         0 $atom->aniso("u12",$u12);
1290 0         0 $atom->aniso("u13",$u13);
1291 0         0 $atom->aniso("u23",$u23);
1292             }
1293             else { # SIGUIJ
1294 0 0       0 if ($atom_name ne $atomname) { # something wrong with PDB file
1295 0         0 $self->throw("A SIGUIJ record should have the same $atomname as the previous record $atom_name\n");
1296             }
1297             # could use different variable names, but hey ...
1298 0         0 $atom->aniso("sigu11",$u11);
1299 0         0 $atom->aniso("sigu22",$u22);
1300 0         0 $atom->aniso("sigu33",$u33);
1301 0         0 $atom->aniso("sigu12",$u12);
1302 0         0 $atom->aniso("sigu13",$u13);
1303 0         0 $atom->aniso("sigu23",$u23);
1304             }
1305             } # ANISOU | SIGUIJ
1306              
1307 659 100       926 if (/^TER /) {
1308 4         8 $_ = undef;
1309 4         12 next;
1310             }
1311              
1312 655 50       831 if (/^ENDMDL/) {
1313 0         0 $_ = $self->_readline;
1314 0         0 last;
1315             }
1316              
1317 655 100       912 if (/^(CONECT|MASTER)/) { # get out of here
1318             # current line is OK
1319 2         5 last;
1320             }
1321 653         1805 $_ = undef;
1322              
1323             } # while
1324              
1325 2         5 $$buffer = $_;
1326              
1327 2         7 return $model;
1328             } # _read_PDB_coordinate_section
1329              
1330              
1331             sub _write_PDB_simple_record {
1332 38     38   73 my ($self, @args) = @_;
1333 38         96 my ($name, $cont , $annotation, $rol, $string) =
1334             $self->_rearrange([qw(
1335             NAME
1336             CONT
1337             ANNOTATION
1338             ROL
1339             STRING
1340             )],
1341             @args);
1342 38 50 66     95 if (defined $string && defined $annotation) {
1343 0         0 $self->throw("you can only supply one of -annoation or -string");
1344             }
1345 38         41 my ($output_string, $ann_string, $t_string);
1346 38         133 my ($rol_begin, $rol_end) = $rol =~ /^(\d+)-(\d+)$/;
1347 38         72 my $rol_length = $rol_end - $rol_begin +1;
1348 38 100       51 if ($string) {
1349 4 100       9 if (length $string > $rol_length) {
1350             # we might need to split $string in multiple lines
1351 2         4 while (length $string > $rol_length) {
1352             # other option might be to go for a bunch of substr's
1353 3         29 my @c = split//,$string;
1354 3         3 my $t = $rol_length; # index into @c
1355 3         8 while ($c[$t] ne " ") { # find first space, going backwards
1356 21         53 $self->debug("c[t]: $c[$t] $t\n");
1357 21         21 $t--;
1358 21 50       36 if ($t == 0) { $self->throw("Found no space for $string\n"); }
  0         0  
1359             }
1360 3         9 $self->debug("t: $t rol_length: $rol_length\n");
1361 3         6 $ann_string .= substr($string, 0, $t);
1362 3         7 $self->debug("ann_string: $ann_string\n");
1363 3         6 $ann_string .= " " x ($rol_length - $t );
1364 3         5 $string = substr($string, $t+1);
1365 3         5 $string =~ s/^\s+//;
1366 3         10 $self->debug("ann_string: $ann_string~~\nstring: $string~~\n");
1367             }
1368 2         3 $ann_string .= $string;
1369             } else {
1370 2         3 $ann_string = $string;
1371             }
1372             } else {
1373 34         64 $ann_string = $annotation->as_text;
1374 34         103 $ann_string =~ s/^Value: //;
1375             }
1376             # ann_string contains the thing to write out, writing out happens below
1377 38         50 my $ann_length = length $ann_string;
1378              
1379 38         116 $self->debug("ann_string: $ann_string\n");
1380 38 100       58 if ($cont) {
1381 10         32 my ($c_begin, $c_end) = $cont =~ /^(\d+)-(\d+)$/;
1382 10 100       16 if ( $ann_length > $rol_length ) { # we need to continuation lines
1383 6         7 my $first_line = 1;
1384 6         6 my $cont_number = 2;
1385 6         7 my $out_line;
1386 6         6 my $num_pos = $rol_length;
1387 6         8 my $i = 0;
1388 6         8 while( $i < $ann_length ) {
1389 15         20 $t_string = substr($ann_string, $i, $num_pos);
1390 15         36 $self->debug("t_string: $t_string~~$i $num_pos\n");
1391 15 100       21 if ($first_line) {
1392 6         15 $out_line = $name . " " x ($rol_begin - $c_begin) . $t_string;
1393 6         10 $out_line .= " " x (80 - length($out_line) ) . "\n";
1394 6         7 $first_line = 0;
1395 6         9 $output_string = $out_line;
1396 6         4 $i += $num_pos; # first do counter
1397 6 100       14 if ($rol_begin - $c_end == 1) { # next line one character less
1398 4         9 $num_pos--;
1399             }
1400             } else {
1401 9         20 $out_line = $name . sprintf("%2d",$cont_number);
1402             # a space after continuation number
1403 9 100       13 if ($rol_begin - $c_end == 1) { # one space after cont number
1404 6         6 $out_line .= " ";
1405 6         6 $out_line .= $t_string;
1406             } else {
1407 3         6 $out_line .= " " x ($rol_begin - $c_end - 1) . $t_string;
1408             }
1409 9         17 $out_line .= " " x (80 -length($out_line) ) . "\n";
1410 9         8 $cont_number++;
1411 9         11 $output_string .= $out_line;
1412 9         14 $i += $num_pos;
1413             }
1414             }
1415             } else { # no continuation
1416 4         7 my $spaces = $rol_begin - $c_begin; # number of spaces need to insert
1417 4         7 $output_string = $name . " " x $spaces . $ann_string;
1418 4         8 $output_string .= " " x (80 - length($output_string) );
1419             }
1420             } else { # no contintuation lines
1421 28 50       42 if ($ann_length < $rol_length) {
1422 0         0 $output_string = $name . $ann_string;
1423 0         0 $output_string .= " " x (80 - length($output_string) );
1424             } else {
1425 28         42 for (my $i = 0; $i < $ann_length; $i += $rol_length) {
1426 249         199 my $out_line;
1427 249         260 $t_string = substr($ann_string, $i, $rol_length);
1428 249         219 $out_line = $name . $t_string;
1429 249         291 $out_line .= " " x (80 -length($out_line) ) . "\n";
1430 249         389 $output_string .= $out_line;
1431             }
1432             }
1433             }
1434 38         115 $output_string =~ s/\n$//; # remove trailing newline
1435 38         104 $self->_print("$output_string\n");
1436              
1437             }
1438              
1439             sub _write_PDB_remark_record {
1440 13     13   19 my ($self, $struc, $remark_num) = @_;
1441 13         25 my ($ann) = $struc->annotation->get_Annotations("remark_$remark_num");
1442 13         36 my $name = sprintf("REMARK %3d ",$remark_num);
1443 13         23 $self->_write_PDB_simple_record(-name => $name, -annotation => $ann, -rol => "12-70");
1444             }
1445              
1446             1;